In [1]:
import os
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split

In [2]:
target = '../Chanae-regression'
output_dir = '../Chanae-Graph/2021/regression'

In [3]:
os.makedirs(output_dir, exist_ok=True)

In [4]:
start_date = '2021-03-22'
start_date = pd.to_datetime(start_date)
end_date = start_date + pd.Timedelta(days=35)

In [5]:
evaluation_results = []

for file in os.listdir(target):
    df = pd.read_excel(os.path.join(target, file))
    df['date'] = pd.to_datetime(df['date'], format='%Y/%m/%d')
    
    # Filter data within the specified date range
    filtered_data = df[(df['date'] >= start_date) & (df['date'] <= end_date)].copy()

    # Prepare data for regression
    X = filtered_data[['uvb','d2m','RH','minTemp','maxTemp','meanTemp','tp','WS']]
    y = filtered_data['disease']

    # Split data into training and testing se,'ts
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Perform linear regression on the training data
    model = LinearRegression()
    model.fit(X_train, y_train)

    # Make predictions on the test data
    y_pred = model.predict(X_test)

    # Calculate performance metrics on the test set
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    # Store the evaluation results
    evaluation_results.append({
        'File': file,
        'Mean Squared Error': mse,
        'R^2 Score': r2,
    })
    
    print(f"File: {file}")
    print(f"Mean Squared Error: {mse}")
    print(f"R^2 Score: {r2}")

# Save the evaluation results to an Excel file
evaluation_df = pd.DataFrame(evaluation_results)
evaluation_df_path = os.path.join(output_dir, 'evaluation-results.xlsx')
evaluation_df.to_excel(evaluation_df_path, index=False, engine='openpyxl')


File: Chanae-regression.xlsx
Mean Squared Error: 1335874.7048121435
R^2 Score: 0.05491895202901931
File: Chang-Phueak-regression.xlsx
Mean Squared Error: 1745207.7708521725
R^2 Score: -0.23466877773990857
File: Du-Song-Yo-regression.xlsx
Mean Squared Error: 1329924.7703658112
R^2 Score: 0.05912830659022361
File: Phadung-Mat-regression.xlsx
Mean Squared Error: 1496143.7809899824
R^2 Score: -0.058465384036235246
