In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
from sklearn.metrics import accuracy_score, mean_absolute_error, mean_squared_error, r2_score, jaccard_score, f1_score

# Updated sample dataset
data = {
    'Date': ['01/12/08', '02/12/08', '03/12/08', '04/12/08', '05/12/08'],
    'Location': ['Albury', 'Albury', 'Albury', 'Albury', 'Albury'],
    'MinTemp': [13.4, 7.4, 12.9, 9.2, 17.5],
    'MaxTemp': [22.9, 25.1, 25.7, 28.0, 32.3],
    'Rainfall': [0.6, 0.0, 0.0, 0.0, 1.0],
    'RainTomorrow': ['No', 'No', 'Yes', 'No', 'Yes']
}

df = pd.DataFrame(data)


# Drop Date and Location columns
df.drop(['Date', 'Location'], axis=1, inplace=True)

# Convert RainTomorrow column to binary
df['RainTomorrow'] = df['RainTomorrow'].map({'No': 0, 'Yes': 1})

# Split dataset into features and target variable
X = df.drop(['RainTomorrow'], axis=1)
y = df['RainTomorrow']

# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define models
models = {
    'Linear Regression': LinearRegression(),
    'KNN': KNeighborsRegressor(n_neighbors=min(5, len(X_train))),
    'Decision Trees': DecisionTreeRegressor(),
    'Logistic Regression': LogisticRegression(max_iter=10000),
    'SVM': SVR()
}

# Results dictionary
results = {}

# Train and evaluate models
for name, model in models.items():
    model.fit(X_train, y_train)
    predictions = model.predict(X_test)
    
    if isinstance(model, (LinearRegression, LogisticRegression, SVR)):
        results[name] = {
            'MAE': mean_absolute_error(y_test, predictions),
            'MSE': mean_squared_error(y_test, predictions),
            'R2': r2_score(y_test, predictions)
        }
    elif isinstance(model, KNeighborsRegressor):
        predictions = np.round(predictions).astype(int)
        results[name] = {
            'Accuracy': accuracy_score(y_test, predictions),
            'Jaccard': jaccard_score(y_test, predictions, zero_division=0),
            'F1-Score': f1_score(y_test, predictions, zero_division=0)
        }
    elif isinstance(model, DecisionTreeRegressor):
        predictions = np.round(predictions).astype(int)
        results[name] = {
            'Accuracy': accuracy_score(y_test, predictions),
            'F1-Score': f1_score(y_test, predictions, zero_division=0)
        }

# Display results
for name, metrics in results.items():
    print(f"{name}:")
    for metric, value in metrics.items():
        print(f"{metric}: {value}")
    print("-" * 40)


Linear Regression:
MAE: 0.6780536246276068
MSE: 0.45975671787063543
R2: nan
----------------------------------------
KNN:
Accuracy: 1.0
Jaccard: 0.0
F1-Score: 0.0
----------------------------------------
Decision Trees:
Accuracy: 1.0
F1-Score: 0.0
----------------------------------------
Logistic Regression:
MAE: 0.0
MSE: 0.0
R2: nan
----------------------------------------
SVM:
MAE: 0.15332698982954873
MSE: 0.02350916581019054
R2: nan
----------------------------------------


