In [None]:
!pip install pandas numpy scikit-learn matplotlib seaborn




In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.cluster import KMeans
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.metrics import accuracy_score, mean_squared_error, silhouette_score
import pickle


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from google.colab import drive
drive.mount('/content/drive')
df = pd.read_csv('/content/drive/MyDrive/Weatherset/weather.csv')
print(df)

Mounted at /content/drive
            date  precipitation  temp_max  temp_min  wind  weather
0     2012-01-01            0.0      12.8       5.0   4.7  drizzle
1     2012-01-02           10.9      10.6       2.8   4.5     rain
2     2012-01-03            0.8      11.7       7.2   2.3     rain
3     2012-01-04           20.3      12.2       5.6   4.7     rain
4     2012-01-05            1.3       8.9       2.8   6.1     rain
...          ...            ...       ...       ...   ...      ...
1456  2015-12-27            8.6       4.4       1.7   2.9     rain
1457  2015-12-28            1.5       5.0       1.7   1.3     rain
1458  2015-12-29            0.0       7.2       0.6   2.6      fog
1459  2015-12-30            0.0       5.6      -1.0   3.4      sun
1460  2015-12-31            0.0       5.6      -2.1   3.5      sun

[1461 rows x 6 columns]


In [None]:
# Fill missing values
df.fillna(df.mean(numeric_only=True), inplace=True)

# Encode weather type for classification
le = LabelEncoder()
df['weather_encoded'] = le.fit_transform(df['weather'])

# Features for ML models
features = ['precipitation', 'temp_max', 'temp_min', 'wind', 'weather_encoded']

In [None]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(df[features])

kmeans = KMeans(n_clusters=3, random_state=42)
df['cluster'] = kmeans.fit_predict(X_scaled)

# Evaluate clustering
score = silhouette_score(X_scaled, df['cluster'])
print("Silhouette Score:", score)

# Save scaler & clustering model
pickle.dump(scaler, open('scaler.pkl','wb'))
pickle.dump(kmeans, open('kmeans_model.pkl','wb'))

Silhouette Score: 0.3080795370584519


In [None]:
X = df[features]
y = df['weather_encoded']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

clf = RandomForestClassifier(random_state=42)
clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)
print("Classification Accuracy:", accuracy_score(y_test, y_pred))

# Save classifier
pickle.dump(clf, open('weather_classifier.pkl','wb'))

Classification Accuracy: 1.0


In [None]:
X = df[['precipitation','temp_min','wind']]
y = df['temp_max']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

reg = RandomForestRegressor(random_state=42)
reg.fit(X_train, y_train)

y_pred = reg.predict(X_test)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print("Regression RMSE:", rmse)

# Save regressor
pickle.dump(reg, open('temperature_regressor.pkl','wb'))

Regression RMSE: 3.094375472791224


In [None]:
def predict_weather(input_data):
    """
    input_data: A list containing [precipitation, temp_max, temp_min, wind, weather_encoded]
    in that specific order.
    """
    # Load models
    scaler = pickle.load(open('scaler.pkl','rb'))
    clf = pickle.load(open('weather_classifier.pkl','rb'))
    reg = pickle.load(open('temperature_regressor.pkl','rb'))
    kmeans = pickle.load(open('kmeans_model.pkl','rb'))

    # Ensure input_data has 5 elements as expected by scaler and classifier
    if len(input_data) != 5:
        raise ValueError("Input data must contain 5 features: [precipitation, temp_max, temp_min, wind, weather_encoded]")

    # Features for clustering and classification are all 5 elements of input_data
    features_for_clustering_and_clf = input_data

    # Scale features for clustering
    scaled_features = scaler.transform([features_for_clustering_and_clf])

    # Clustering
    cluster = kmeans.predict(scaled_features)[0]

    # Classification
    weather_class_encoded = clf.predict([features_for_clustering_and_clf])[0]
    weather_class = le.inverse_transform([weather_class_encoded])[0]

    # Regression (Temperature prediction)
    # The regressor was trained on ['precipitation','temp_min','wind']
    # Extract these 3 features from the input_data (which is [p, t_max, t_min, w, w_enc])
    regressor_input = [input_data[0], input_data[2], input_data[3]] # [precipitation, temp_min, wind]
    temp_pred = reg.predict([regressor_input])[0]

    return {
        'cluster': int(cluster),
        'weather_type': weather_class,
        'predicted_temperature': round(temp_pred,2)
    }

# Example usage
# Using values from df.iloc[0] for illustration:
# precipitation=0.0, temp_max=12.8, temp_min=5.0, wind=4.7, weather='drizzle' (weather_encoded=0)
sample_input = [0.0, 12.8, 5.0, 4.7, 0] # [precipitation, temp_max, temp_min, wind, weather_encoded]
prediction = predict_weather(sample_input)
print(prediction)

{'cluster': 1, 'weather_type': 'drizzle', 'predicted_temperature': np.float64(12.06)}




In [None]:
from google.colab import files

# Ensure previous cells that create these files have been run successfully.

files.download('weather_classifier.pkl')
files.download('kmeans_model.pkl')
files.download('scaler.pkl')
files.download('temperature_regressor.pkl')

# The following files were not explicitly saved in the notebook:
# files.download('weather_model.pkl')
# files.download('label_encoder.pkl')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
import pickle
from sklearn.preprocessing import LabelEncoder

# Example LabelEncoder
le = LabelEncoder()
le.fit(['Sunny', 'Rainy', 'Cloudy'])

# Save
with open('label_encoder.pkl', 'wb') as f:
    pickle.dump(le, f)

# Download to local
from google.colab import files
files.download('label_encoder.pkl')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>