# Forest cover classification

There are 7 diferent types of covers into which the data will be classified:
* Spruce/Fir
* Lodgepole Pine
* Ponderosa Pine
* Cottonwood/Willow
* Aspen
* Douglas-fir
* Krummholz

The model uses a Sequential arquitecture.

In [11]:
import tensorflow as tf
from tensorflow import keras

import sklearn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.compose import ColumnTransformer

import matplotlib as plt
import numpy
import pandas as pd

***Import and preprocess the data***

The last data point of each line of the .csv is the type of cover the line represents.

In [12]:
data = pd.read_csv('cover_data.csv')
print(data.info())

#Extract the last column from the data pandas dataframe, which is the label
cover_type = data["class"]
#Extract from the first to penuultimate column from the data pandas dataframe
caracteristics = data.iloc[:,0:-1]

#Perform one-hot encoding
caracteristics = pd.get_dummies(caracteristics)

#Split the data
x_train, x_test, y_train, y_test = train_test_split(caracteristics, cover_type, train_size=30, random_state=0)

#Enconde and transform data with labels
#Only the numeric columns are scaled
numeric_columns = ['Elevation','Aspect','Slope','Horizontal_Distance_To_Hydrology','Vertical_Distance_To_Hydrology','Horizontal_Distance_To_Roadways','Hillshade_9am','Hillshade_Noon','Hillshade_3pm','Horizontal_Distance_To_Fire_Points']
cp = ColumnTransformer([("numeric", StandardScaler(), numeric_columns)])
x_train = cp.fit_transform(x_train)
x_test = cp.transform(x_test)

le = LabelEncoder()
y_train = le.fit_transform(y_train)
y_test = le.transform(y_test)

y_train = keras.utils.to_categorical(y_train)
y_test = keras.utils.to_categorical(y_test)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 581012 entries, 0 to 581011
Data columns (total 55 columns):
 #   Column                              Non-Null Count   Dtype
---  ------                              --------------   -----
 0   Elevation                           581012 non-null  int64
 1   Aspect                              581012 non-null  int64
 2   Slope                               581012 non-null  int64
 3   Horizontal_Distance_To_Hydrology    581012 non-null  int64
 4   Vertical_Distance_To_Hydrology      581012 non-null  int64
 5   Horizontal_Distance_To_Roadways     581012 non-null  int64
 6   Hillshade_9am                       581012 non-null  int64
 7   Hillshade_Noon                      581012 non-null  int64
 8   Hillshade_3pm                       581012 non-null  int64
 9   Horizontal_Distance_To_Fire_Points  581012 non-null  int64
 10  Wilderness_Area1                    581012 non-null  int64
 11  Wilderness_Area2                    581012 non-null 

ValueError: y contains previously unseen labels: [4, 5]

***Create the model***

In [None]:
#Function to create the model
def create_model(x_train):
    model = keras.models.Sequential()

    #Input layer
    model.add(keras.layers.InputLayer(input_shape=(x_train.shape[1])))