In [None]:
!pip install -q imbalanced-learn scikit-learn pandas numpy matplotlib seaborn


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score, confusion_matrix
from sklearn.metrics import mean_absolute_error, mean_squared_error
from imblearn.over_sampling import SMOTE


In [None]:
file_path = "/content/large_tree_replacement_data.csv"  # Update with your file path

df = pd.read_csv(file_path)

In [None]:
df.info()
print(df.head())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5000 entries, 0 to 4999
Data columns (total 10 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   Location                     5000 non-null   object 
 1   Trees Lost                   5000 non-null   int64  
 2   Climate                      5000 non-null   object 
 3   Latitude                     5000 non-null   float64
 4   Longitude                    5000 non-null   float64
 5   Recommended Tree             5000 non-null   object 
 6   Trees to Plant               5000 non-null   int64  
 7   Carbon Absorption (kg/year)  5000 non-null   int64  
 8   Average Temperature (°C)     5000 non-null   float64
 9   Average Rainfall (mm/year)   5000 non-null   int64  
dtypes: float64(3), int64(4), object(3)
memory usage: 390.8+ KB
            Location  Trees Lost        Climate  Latitude  Longitude  \
0            Beijing         822    Continental  -18.7657  -1

In [None]:
df = df.dropna()

In [None]:
categorical_cols = df.select_dtypes(include=['object']).columns.tolist()
print("Categorical columns detected:", categorical_cols)

Categorical columns detected: ['Location']


In [None]:
non_numeric_cols = X_train.select_dtypes(include=['object']).columns.tolist()
print("Non-numeric columns:", non_numeric_cols)

Non-numeric columns: ['Location']


In [None]:
le = LabelEncoder()
X_train['Location'] = le.fit_transform(X_train['Location'].astype(str))

In [None]:
X = df.drop(columns=["Recommended Tree", "Trees to Plant"])
y1 = df["Recommended Tree"]

In [None]:
X_train, X_test, y1_train, y1_test = train_test_split(X, y1, test_size=0.2, random_state=42)
X_train, X_test, y2_train, y2_test = train_test_split(X, y2, test_size=0.2, random_state=42)

In [None]:
from imblearn.over_sampling import SMOTE

smote = SMOTE()
X_train_balanced, y1_train_balanced = smote.fit_resample(X_train, y1_train)

In [None]:
scaler = StandardScaler()
X_train_balanced = scaler.fit_transform(X_train_balanced)
X_test = scaler.transform(X_test)


In [None]:
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
rf_classifier.fit(X_train_balanced, y1_train_balanced)
y1_pred = rf_classifier.predict(X_test)


In [None]:
from sklearn.ensemble import RandomForestRegressor
rf_regressor = RandomForestRegressor(n_estimators=100, random_state=42)
rf_regressor.fit(X_train, y2_train)
y2_pred = rf_regressor.predict(X_test)



In [None]:
precision_y1 = precision_score(y1_test, y1_pred, average='weighted')
recall_y1 = recall_score(y1_test, y1_pred, average='weighted')
f1_y1 = f1_score(y1_test, y1_pred, average='weighted')
accuracy_y1 = accuracy_score(y1_test, y1_pred)
conf_matrix_y1 = confusion_matrix(y1_test, y1_pred)

print("Metrics for 'Recommended Tree':")
print(f"Precision: {precision_y1}")
print(f"Recall: {recall_y1}")
print(f"F1-score: {f1_y1}")
print(f"Accuracy: {accuracy_y1}")
print(f"Confusion Matrix:\n{conf_matrix_y1}\n")


Metrics for 'Recommended Tree':
Precision: 0.10032819284657311
Recall: 0.101
F1-score: 0.1001393542225218
Accuracy: 0.101
Confusion Matrix:
[[11  4 13 11 12  6 12  9 20  1]
 [11 14 10 13  7 15 11  8  6  5]
 [ 9  8 13 11 10 11  8  7  3 10]
 [10 12 11 10 11  8  9 12  5 14]
 [22 14 13  8 12 17  7  8 12  9]
 [12 12 12 11  9 13  9 11 12 10]
 [11 12  8  8 10  5  5  8  7 10]
 [ 9 12 12  4 12  7  6 13 10 11]
 [ 9 11 11 12  9 10  9  6  5  8]
 [17 14 11 11 11  9  6 11 11  5]]

