In [19]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score


# Load dataset (replace with actual weather data)
data = pd.read_csv("weather_data_200_calls.csv")  # Ensure the dataset contains relevant features like temperature, humidity, etc.


from sklearn.preprocessing import StandardScaler, LabelEncoder


# Ensure your DataFrame `data` is loaded correctly
# Example: Columns = ["City", "Temperature", "Humidity", "Wind Speed", "Pressure", "Weather Description"]

# Encode categorical columns
data = data.copy()  # Work on a copy of the data
label_encoder = LabelEncoder()
data["City"] = label_encoder.fit_transform(data["City"])
data["Weather Description"] = label_encoder.fit_transform(data["Weather Description"])

# Preprocessing
X = data.drop(columns=["Weather Description"])  # Keep features
y = data["Weather Description"]  # Target variable

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Output shapes for verification
print("Features (X) shape:", X_scaled.shape)
print("Target (y) shape:", y.shape)



# Models to train
models = {
    "Random Forest": RandomForestRegressor(n_estimators=100, random_state=42),
    "Linear Regression": LinearRegression(),
}

# Train and evaluate models
results = {}
for model_name, model in models.items():
    model.fit(X_train, y_train)
    predictions = model.predict(X_test)
    mae = mean_absolute_error(y_test, predictions)
    mse = mean_squared_error(y_test, predictions)
    r2 = r2_score(y_test, predictions)
    results[model_name] = {"MAE": mae, "MSE": mse, "R2": r2}

# Display results
print("Model Evaluation Results:")
for model_name, metrics in results.items():
    print(f"{model_name}: {metrics}")

# Predict current weather (use real-time features as input)
# Ensure the feature names match the dataset columns
feature_columns = X.columns  # Retrieve column names from the original dataset
real_time_features = np.array([[0, 21, 85, 12, 1013]])  # Replace with actual values and include all features
real_time_features_df = pd.DataFrame(real_time_features, columns=feature_columns)

# Scale the features using StandardScaler
real_time_features_scaled = scaler.transform(real_time_features_df)

# Predict current weather
best_model = models["Random Forest"]  # Replace with the best-performing model
current_weather_prediction = best_model.predict(real_time_features_scaled)

# Decode the prediction if label encoding was applied
decoded_prediction = label_encoder.inverse_transform(np.round(current_weather_prediction).astype(int))

print(f"Predicted Current Weather (decoded): {decoded_prediction}")



Features (X) shape: (200, 5)
Target (y) shape: (200,)
Model Evaluation Results:
Random Forest: {'MAE': 0.00425000000000002, 'MSE': 0.00012250000000000143, 'R2': 0.9999768431001891}
Linear Regression: {'MAE': 1.4671826223589002, 'MSE': 2.9765371853392812, 'R2': 0.4373275642080753}
Predicted Current Weather (decoded): ['clear sky']


In [6]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier  # Change to classifier for classification tasks
from sklearn.linear_model import LogisticRegression  # You can use any classifier
from sklearn.metrics import accuracy_score, mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler, LabelEncoder

# Load dataset (replace with actual weather data)
data = pd.read_csv("weather_data_200_calls.csv")  # Ensure the dataset contains relevant features like temperature, humidity, etc.

# Encode categorical columns
data = data.copy()  # Work on a copy of the data
label_encoder = LabelEncoder()
data["City"] = label_encoder.fit_transform(data["City"])
data["Weather Description"] = label_encoder.fit_transform(data["Weather Description"])

# Preprocessing
X = data.drop(columns=["Weather Description"])  # Keep features
y = data["Weather Description"]  # Target variable

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Output shapes for verification
print("Features (X) shape:", X_scaled.shape)
print("Target (y) shape:", y.shape)

# Models to train
models = {
    "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42),  # Changed to classifier
    "Logistic Regression": LogisticRegression(max_iter=1000),  # Using logistic regression for classification
}

# Train and evaluate models
results = {}
for model_name, model in models.items():
    model.fit(X_train, y_train)
    predictions = model.predict(X_test)  # Predictions for classification
    
    # Classification accuracy
    accuracy = accuracy_score(y_test, predictions) * 100 # Using accuracy for classification
    results[model_name] = {"Accuracy": accuracy}

# Display results
print("Model Evaluation Results:")
for model_name, metrics in results.items():
    print(f"{model_name}: {metrics}")

# Predict current weather (use real-time features as input)
# Ensure the feature names match the dataset columns
feature_columns = X.columns  # Retrieve column names from the original dataset
real_time_features = np.array([[0, 21, 85, 12, 1013]])  # Replace with actual values and include all features
real_time_features_df = pd.DataFrame(real_time_features, columns=feature_columns)

# Scale the features using StandardScaler
real_time_features_scaled = scaler.transform(real_time_features_df)

# Predict current weather
best_model = models["Random Forest"]  # Replace with the best-performing model
current_weather_prediction = best_model.predict(real_time_features_scaled)

# Decode the prediction if label encoding was applied
decoded_prediction = label_encoder.inverse_transform(np.round(current_weather_prediction).astype(int))

print(f"Predicted Current Weather (decoded): {decoded_prediction}")
import joblib

# Save the best model (Random Forest in this case)
joblib.dump(best_model, "random_forest_weather_model.pkl")

# Save the scaler and label encoder for preprocessing
joblib.dump(scaler, "scaler.pkl")
joblib.dump(label_encoder, "label_encoder.pkl")


Features (X) shape: (200, 5)
Target (y) shape: (200,)
Model Evaluation Results:
Random Forest: {'Accuracy': 100.0}
Logistic Regression: {'Accuracy': 77.5}
Predicted Current Weather (decoded): ['broken clouds']


['label_encoder.pkl']