<a href="https://colab.research.google.com/github/Delsa2001/DSGP/blob/Deshan/Model%20updated.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Step 1: Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix

# Step 2: Load the datasets
galle_weather = pd.read_csv('galle_weather_updated.csv')
colombo_weather = pd.read_csv('colombo_weather_updated.csv')

# Step 3: Combine the datasets and preprocess
galle_weather['Location'] = 'Galle'
colombo_weather['Location'] = 'Colombo'
combined_weather = pd.concat([galle_weather, colombo_weather], ignore_index=True)

# Convert 'Date' to datetime format and drop the column
combined_weather['Date'] = pd.to_datetime(combined_weather['Date'], format='%Y%m%d')
combined_weather = combined_weather.drop(columns=['Date'])

# Encode the target variable 'Suitable_Plant_Type'
label_encoder = LabelEncoder()
combined_weather['Suitable_Plant_Type'] = label_encoder.fit_transform(combined_weather['Suitable_Plant_Type'])

# One-hot encode the 'Location' column
combined_weather = pd.get_dummies(combined_weather, columns=['Location'], drop_first=True)

# Step 4: Split the data into features (X) and target (y)
X = combined_weather.drop(columns=['Suitable_Plant_Type'])
y = combined_weather['Suitable_Plant_Type']

# Split into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Step 5: Train the Random Forest model
rf_model = RandomForestClassifier(random_state=42, n_estimators=100)
rf_model.fit(X_train, y_train)

# Step 6: Predict and evaluate
y_pred = rf_model.predict(X_test)

# Step 7: Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
classification_report_output = classification_report(y_test, y_pred)
confusion_mat = confusion_matrix(y_test, y_pred)

# Print results
print("Accuracy:", accuracy)
print("\nClassification Report:\n", classification_report_output)
print("\nConfusion Matrix:\n", confusion_mat)


Accuracy: 1.0

Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00       366
           1       1.00      1.00      1.00       365

    accuracy                           1.00       731
   macro avg       1.00      1.00      1.00       731
weighted avg       1.00      1.00      1.00       731


Confusion Matrix:
 [[366   0]
 [  0 365]]


In [2]:

from sklearn.model_selection import cross_val_score
scores = cross_val_score(rf_model, X, y, cv=5, scoring='accuracy')
print("Cross-Validation Accuracy Scores:", scores)
print("Mean Accuracy:", scores.mean())


Cross-Validation Accuracy Scores: [1. 1. 1. 1. 1.]
Mean Accuracy: 1.0


In [3]:
feature_importances = rf_model.feature_importances_
for name, importance in zip(X.columns, feature_importances):
    print(f"{name}: {importance:.4f}")



Temperature: 0.1480
Precipitation: 0.0040
Humidity: 0.0241
Location_Galle: 0.8239


In [4]:
X_no_location = X.drop(columns=['Location_Galle'])
X_train_no_loc, X_test_no_loc, y_train_no_loc, y_test_no_loc = train_test_split(
    X_no_location, y, test_size=0.2, random_state=42, stratify=y
)
rf_model_no_loc = RandomForestClassifier(random_state=42, n_estimators=100)
rf_model_no_loc.fit(X_train_no_loc, y_train_no_loc)
y_pred_no_loc = rf_model_no_loc.predict(X_test_no_loc)
print("Accuracy without Location:", accuracy_score(y_test_no_loc, y_pred_no_loc))
print("\nClassification Report:\n", classification_report(y_test_no_loc, y_pred_no_loc))


Accuracy without Location: 0.7756497948016415

Classification Report:
               precision    recall  f1-score   support

           0       0.77      0.79      0.78       366
           1       0.78      0.76      0.77       365

    accuracy                           0.78       731
   macro avg       0.78      0.78      0.78       731
weighted avg       0.78      0.78      0.78       731

