In [1]:
# 3. Classification:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.decomposition import PCA
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score

# Load your dataset 
data = pd.read_csv('dataset.csv')

# Transform categorical variables (if any) to numerical
encoder = LabelEncoder()
data['conditions'] = encoder.fit_transform(data['conditions'])  # Example categorical column

# Define features and target variable
features = data[['tempmax', 'tempmin', 'humidity', 'windspeed']]  # Relevant features
labels = data['conditions']  # Target variable

# Split dataset into train and test sets
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)

# Scale the features using StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Apply PCA for dimensionality reduction (keep 2 components)
pca = PCA(n_components=2)
X_train_pca = pca.fit_transform(X_train_scaled)
X_test_pca = pca.transform(X_test_scaled)

# Define the models to evaluate
models = {
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "Naive Bayes": GaussianNB(),
    "KNN": KNeighborsClassifier(),
    "Linear SVM": SVC(kernel='linear'),
    "Kernel SVM": SVC(kernel='rbf'),
    "Decision Tree": DecisionTreeClassifier(),
    "Basic Neural Network": MLPClassifier(max_iter=1000)
}

# Function to train and evaluate models
def evaluate_models(X_train, X_test, y_train, y_test, dataset_name):
    results = []

    for model_name, model in models.items():
        # Train the model
        model.fit(X_train, y_train)

        # Make predictions
        y_pred = model.predict(X_test)

        # Calculate metrics
        precision = precision_score(y_test, y_pred, average='weighted', zero_division=0)
        recall = recall_score(y_test, y_pred, average='weighted', zero_division=0)
        f1 = f1_score(y_test, y_pred, average='weighted', zero_division=0)
        accuracy = accuracy_score(y_test, y_pred)

        # Store results
        results.append([model_name + f" ({dataset_name})", precision, recall, f1, accuracy])

    # Create a DataFrame to display results
    results_df = pd.DataFrame(results, columns=['Model', 'Precision', 'Recall', 'F1-score', 'Accuracy'])
    return results_df

# Evaluate on the original dataset (before PCA)
original_results = evaluate_models(X_train_scaled, X_test_scaled, y_train, y_test, 'Original')

# Evaluate on the pre-processed dataset (after PCA)
pre_processed_results = evaluate_models(X_train_pca, X_test_pca, y_train, y_test, 'Pre-processed')

# Combine the results into one DataFrame
final_results = pd.concat([original_results, pre_processed_results], ignore_index=True)

# Display the results
print(final_results)

                                   Model  Precision    Recall  F1-score  \
0         Logistic Regression (Original)   0.577768  0.646084  0.607757   
1                 Naive Bayes (Original)   0.553700  0.569277  0.558712   
2                         KNN (Original)   0.578512  0.625000  0.595520   
3                  Linear SVM (Original)   0.577485  0.646084  0.605298   
4                  Kernel SVM (Original)   0.570782  0.641566  0.596799   
5               Decision Tree (Original)   0.566016  0.572289  0.568392   
6        Basic Neural Network (Original)   0.633334  0.652108  0.617279   
7    Logistic Regression (Pre-processed)   0.478382  0.546687  0.457064   
8            Naive Bayes (Pre-processed)   0.489526  0.542169  0.413839   
9                    KNN (Pre-processed)   0.434664  0.484940  0.449558   
10            Linear SVM (Pre-processed)   0.282627  0.531627  0.369054   
11            Kernel SVM (Pre-processed)   0.495465  0.558735  0.461623   
12         Decision Tree 