# Forest Cover Type Prediction Model

##Data Load

### File Load

In [1]:
from google.colab import files
uploaded = files.upload()

Saving ForestCoverType.csv to ForestCoverType.csv


### Data Frame Creation

In [3]:
import pandas as pd
import io
df = pd.read_csv(io.BytesIO(uploaded['ForestCoverType.csv']))

## Data Preparation

### Spliting Output and Input Variables

In [7]:
y = df["Cover_Type"]
x = df.iloc[:, :-1]

### Checking Class Distributions

In [10]:
from collections import Counter
print("Class Distribution:", Counter(y))

Class Distribution: Counter({5: 2160, 2: 2160, 1: 2160, 7: 2160, 3: 2160, 6: 2160, 4: 2160})


### Splitting Train and Test Data Sets

In [95]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, train_size = 0.85, random_state = 42)
print("Class Distribution:", Counter(y_train))
print("Class Distribution:", Counter(y_test))

Class Distribution: Counter({1: 1847, 4: 1841, 6: 1841, 5: 1839, 3: 1835, 2: 1828, 7: 1821})
Class Distribution: Counter({7: 339, 2: 332, 3: 325, 5: 321, 6: 319, 4: 319, 1: 313})


## Model Building

### Decision Tree Classifier

In [190]:
from sklearn.tree import DecisionTreeClassifier
model1 = DecisionTreeClassifier()

In [191]:
from sklearn.ensemble import RandomForestClassifier
model2 = RandomForestClassifier()

In [192]:
from sklearn.neighbors import KNeighborsClassifier
model3 = KNeighborsClassifier()

In [193]:
from sklearn.naive_bayes import GaussianNB
model4 = GaussianNB()

In [194]:
from sklearn.ensemble import StackingClassifier
from sklearn.linear_model import LogisticRegression
base_models = [("rfc", model1), ("dtc", model2), ("knc", model3), ("gnb", model4)]
meta_learner = LogisticRegression(max_iter = 1000)
stacking_model = StackingClassifier(estimators = base_models, final_estimator = meta_learner)
stacking_model.fit(x_train, y_train)

### Prediction Making

In [198]:
y_pred = stacking_model.predict(x_test)

## Prediction Evaluation

### Evaluation

In [199]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
acc = accuracy_score(y_test, y_pred)
prec = precision_score(y_test, y_pred, average = "weighted")
rec = recall_score(y_test, y_pred, average = "weighted")
f1 = f1_score(y_test, y_pred, average = "weighted")
print("Accuracy Score: ","{:.2%}".format(acc))
print("Precision Score: ","{:.2%}".format(prec))
print("Recall Score: ","{:.2%}".format(rec))
print("F1 Score: ","{:.2%}".format(f1))

Accuracy Score:  90.17%
Precision Score:  90.02%
Recall Score:  90.17%
F1 Score:  90.04%
