In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report

# 1. Load the dataset
data = pd.read_csv('sam.csv')  # Adjust path as needed

# 2. Preprocess the data (if necessary)
# Assuming the dataset is clean and contains features like 'temperature', 'humidity', and 'weather_condition'

# 3. Split the data into training and testing sets
X = data[['temp', 'humidity']]  # Features
y = data['weather_type']  # Target variable

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 4. Scale the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 5. Train decision tree, random forest, and ensemble models
dt_classifier = DecisionTreeClassifier(random_state=42)
dt_classifier.fit(X_train, y_train)

rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
rf_classifier.fit(X_train, y_train)

# Initialize Logistic Regression with increased max_iter and scaled data
lr_classifier = LogisticRegression(random_state=42, max_iter=1000)
lr_classifier.fit(X_train_scaled, y_train)  # Use scaled data

# Initialize SVC with scaled data
svc_classifier = SVC(probability=True, random_state=42)
svc_classifier.fit(X_train_scaled, y_train)  # Use scaled data

ensemble_clf = VotingClassifier(estimators=[
    ('dt', dt_classifier),
    ('rf', rf_classifier),
    ('lr', lr_classifier),
    ('svc', svc_classifier)], voting='soft')

ensemble_clf.fit(X_train_scaled, y_train)  # Use scaled data

# 6. Evaluate the models
models = {
    "Decision Tree": dt_classifier,
    "Random Forest": rf_classifier,
    "Logistic Regression": lr_classifier,
    "SVC": svc_classifier,
    "Ensemble Model": ensemble_clf
}

for name, model in models.items():
    y_pred = model.predict(X_test_scaled)  # Use scaled data for predictions
    accuracy = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred)
    print(f"Model: {name}")
    print(f"Accuracy: {accuracy:.2f}")
    print(f"Classification Report:\n{report}\n")
