In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
import pickle
import json

In [2]:
df = pd.read_csv('.\\dataset\\cleaned_output_data.csv')

# Display the first few rows to check the data
print(df.head())

   air_temp  humidity                         Land_Type
0      23.9      39.7  Mediterranean Drought Management
1      27.3      46.7                  Urban Cool Roofs
2      28.3      61.3       Tropical Urban Green Spaces
3      25.8      44.9                  Urban Cool Roofs
4      23.2      76.3       Tropical Urban Green Spaces


In [3]:
df = df.dropna()

In [4]:
df

Unnamed: 0,air_temp,humidity,Land_Type
0,23.9,39.7,Mediterranean Drought Management
1,27.3,46.7,Urban Cool Roofs
2,28.3,61.3,Tropical Urban Green Spaces
3,25.8,44.9,Urban Cool Roofs
4,23.2,76.3,Tropical Urban Green Spaces
...,...,...,...
18259,31.0,85.5,Tropical Storm Surge Defense
18260,33.7,37.3,Urban Cool Roofs
18261,31.9,30.8,Urban Cool Roofs
18262,31.7,49.7,Urban Cool Roofs


In [5]:
# Define features and target variable
X = df[['air_temp', 'humidity']]  # Features
y = df['Land_Type']  # Target

# Check for missing data
df.isnull().sum()

air_temp     0
humidity     0
Land_Type    0
dtype: int64

In [6]:
le = LabelEncoder()
y_encoded = le.fit_transform(y)

In [7]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


In [8]:
# Initialize and train the Random Forest classifier
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)


In [9]:
# Predict on the training data
y_train_pred = clf.predict(X_train)
train_accuracy = accuracy_score(y_train, y_train_pred)
print("Training Accuracy Score:", train_accuracy)

Training Accuracy Score: 1.0


In [10]:
y_pred = clf.predict(X_test)

# Evaluate the performance
print("Accuracy Score:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred, target_names=le.classes_))

Accuracy Score: 0.9998175182481752

Classification Report:
                                    precision    recall  f1-score   support

     Arctic Permafrost Protection       1.00      1.00      1.00       244
          Desert Oasis Management       1.00      0.95      0.98        22
 Mediterranean Drought Management       1.00      1.00      1.00       438
Sub-Arctic Summer Heat Management       1.00      1.00      1.00      1166
    Temperate Wetland Restoration       1.00      1.00      1.00       884
     Tropical Storm Surge Defense       1.00      1.00      1.00       293
      Tropical Urban Green Spaces       1.00      1.00      1.00      1839
                 Urban Cool Roofs       1.00      1.00      1.00       594

                         accuracy                           1.00      5480
                        macro avg       1.00      0.99      1.00      5480
                     weighted avg       1.00      1.00      1.00      5480



In [11]:
from sklearn.model_selection import cross_val_score

# Perform cross-validation
cv_scores = cross_val_score(clf, X, y_encoded, cv=5)  # 5-fold cross-validation

print("Cross-Validation Scores:", cv_scores)
print("Mean Cross-Validation Score:", cv_scores.mean())

Cross-Validation Scores: [1.         1.         1.         1.         0.99945235]
Mean Cross-Validation Score: 0.9998904709748084


In [12]:
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score

# Get predictions from cross-validation
from sklearn.model_selection import cross_val_predict

y_pred_cv = cross_val_predict(clf, X, y_encoded, cv=5)

# Compute confusion matrix and additional metrics
conf_matrix = confusion_matrix(y_encoded, y_pred_cv)
precision = precision_score(y_encoded, y_pred_cv, average='weighted')
recall = recall_score(y_encoded, y_pred_cv, average='weighted')
f1 = f1_score(y_encoded, y_pred_cv, average='weighted')

print("Confusion Matrix:\n", conf_matrix)
print("Precision Score:", precision)
print("Recall Score:", recall)
print("F1 Score:", f1)

Confusion Matrix:
 [[ 865    0    0    0    0    0    0    0]
 [   0   55    0    0    0    0    0    0]
 [   0    0 1311    0    0    0    0    0]
 [   0    0    0 3821    0    0    0    0]
 [   0    0    0    2 2892    0    0    0]
 [   0    0    0    0    0  903    0    0]
 [   0    0    0    0    0    0 6299    0]
 [   0    0    0    0    0    0    0 2116]]
Precision Score: 0.9998905522502584
Recall Score: 0.9998904949627683
F1 Score: 0.9998904903625262


In [17]:
def predict_land_type(air_temp, humidity):
    if air_temp < -10 or air_temp > 50:  # Example reasonable range
        return 'Invalid air temperature value'
    if humidity < 0 or humidity > 100:
        return 'Invalid humidity value'
    
    # Predict the land type using the trained model
    prediction = clf.predict([[air_temp, humidity]])
    return prediction[0]

# Test the function with sample inputs
air_temp = 0
humidity = 60
print(f"Predicted land type: {predict_land_type(air_temp, humidity)}")


Predicted land type: Arctic Permafrost Protection




In [14]:
with open('atmosphere_model.pickle','wb') as f:
    pickle.dump(clf,f)

In [15]:
columns={
    'data_columns':[col.lower() for col in X.columns]
}
with open("columns.json","w")as f:
    f.write(json.dumps(columns))