In [15]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Read the CSV file
data = pd.read_csv("./HCM_Dataset.csv")

# Function to create list
def Create_list(x):
    list_of_lists = [w.split() for w in x.split(',')]
    flat_list = [item for sublist in list_of_lists for item in sublist]
    return flat_list

# Function to get weather
def Get_Weather(list1):
    if 'Cloudy' in list1:
        return 'CLOUDY'
    elif 'Rainy' in list1:
        return 'RAINY'
    elif 'Sunny' in list1:
        return 'SUNNY'
    elif 'Clear' in list1:
        return 'CLEAR'

# Apply functions to create standardized weather column
data['Std_Weather'] = data['Weather'].apply(lambda x: Get_Weather(Create_list(x)))

# Sample data for balanced classes
cloudy_df = data[data['Std_Weather'] == 'CLOUDY'].sample(1500)
rainy_df = data[data['Std_Weather'] == 'RAINY'].sample(1500)
clear_df = data[data['Std_Weather'] == 'CLEAR'].sample(1500)
sunny_df = data[data['Std_Weather'] == 'SUNNY']

# Concatenate sampled dataframes
weather_df = pd.concat([cloudy_df, clear_df, rainy_df, sunny_df], axis=0)

# Drop unnecessary columns
weather_df.drop(columns=['Date/Time', 'Weather', 'Temperature'], axis=1, inplace=True)

# Encode categorical variable
label_encoder = LabelEncoder()
weather_df['Std_Weather'] = label_encoder.fit_transform(weather_df['Std_Weather'])

# Standardize features
scaler = StandardScaler()
X = weather_df.drop(['Std_Weather'], axis=1)
X_std = scaler.fit_transform(X)
y = weather_df['Std_Weather']

# Split data into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(X_std, y, test_size=0.2, random_state=42)

# Train a Random Forest Classifier
rf_model = RandomForestClassifier(max_features='sqrt', n_estimators=100)
rf_model.fit(x_train, y_train)

# Make predictions on the test set
y_pred_rf = rf_model.predict(x_test)

# Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred_rf))
print(classification_report(y_test, y_pred_rf))


Accuracy: 0.494991652754591
              precision    recall  f1-score   support

           0       0.49      0.62      0.55       293
           1       0.39      0.30      0.34       323
           2       0.52      0.58      0.55       297
           3       0.57      0.50      0.53       285

    accuracy                           0.49      1198
   macro avg       0.49      0.50      0.49      1198
weighted avg       0.49      0.49      0.49      1198



In [17]:
# Kết xuất
import joblib
model_with_scaler = (rf_model, scaler)
joblib.dump(model_with_scaler, 'weatherPredictor.pkl')

['weatherPredictor.pkl']

In [9]:
# Example usage
weather_predictor = joblib.load("./weatherPredict.pkl", mmap_mode='r')
Humidity = 60
Temperature = 35
input_data = [Humidity, Temperature]
scaled_data = scaler.transform([input_data])
prediction = weather_predictor.predict(scaled_data)
prediction



memmap([2])

In [10]:
if prediction[0] == 0:
    print('CLEAR')
elif prediction[0] == 1:
    print('CLOUDY')
elif prediction[0] == 2:
    print('RAINY')
else:
    print('SUNNY')

RAINY
