In [2]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

In [3]:
target = '../Satun-Phang-Nga/Phang-Nga-regression'
# output = '../Satun-Phang-Nga/Phang-Nga-regression'

In [4]:
columns_to_process = ['uvb', 'd2m', 'RH', 'minTemp', 'maxTemp', 'meanTemp', 'tp', 'WS', 'durationOfDay']
results = []

In [5]:
for i, file in enumerate(os.listdir(target)):
    df = pd.read_excel(os.path.join(target, file))
    for column in columns_to_process:
        if column in df.columns:
            # processed_df = df[column].head(3)
            # # Print the processed DataFrame
            # print(f"File: {file}, Column: {column}")
            # print(processed_df)
            X = df[column].values.reshape(-1,1)
            y = df['disease']

            X_train, X_test, y_train, y_test = train_test_split(
                X, y, test_size=0.2, random_state=42)

            model = LinearRegression()
            model.fit(X_train, y_train)

            y_pred = model.predict(X_test)

            mse = mean_squared_error(y_test, y_pred)
            r2 = r2_score(y_test, y_pred)

            results.append({
                    'File': '-'.join(file.split('-')[:8]),
                    'Column': column,
                    'MSE': mse,
                    'R-squared': r2
                })
        else:
            print(f"Column {column} not found in file {file}")

In [6]:
results_df = pd.DataFrame(results)

# Export results to Excel
output_file = "Phang-Nga-regression-results.xlsx"
results_df.to_excel(output_file, index=False)
print(f"Results exported to {output_file}")

Results exported to Phang-Nga-regression-results.xlsx
