## Importing Libraries

In [1]:
# Pandas is used for data manipulation
import pandas as pd

# Use numpy to convert to arrays
import numpy as np

# Import tools needed for visualization
from sklearn.tree import export_graphviz
import pydot
import matplotlib.pyplot as plt
%matplotlib inline

## Data Exploration

In [2]:
# Reading the data to a dataframe 
df = pd.read_csv('../content/Frequency measurement_1.csv')

In [3]:
# displaying first 5 rows
df.head(5)

Unnamed: 0,a(cm),h(cm),permitivity,ratio,Measured_frequency
0,6.8,0.08,2.32,0.003392,835
1,6.8,0.159,2.32,0.006692,829
2,6.8,0.318,2.32,0.013159,815
3,5.0,0.159,2.32,0.009106,1128
4,3.8,0.1524,2.49,0.011567,1443


In [4]:
# the shape of our features
df.shape

(20, 5)

In [5]:
# column names
df.columns

Index(['a(cm)', 'h(cm)', 'permitivity', 'ratio', 'Measured_frequency'], dtype='object')

In [6]:
# checking for null values
df.isnull().sum()

a(cm)                 0
h(cm)                 0
permitivity           0
ratio                 0
Measured_frequency    0
dtype: int64

There are no null values

## One-Hot Encoding

A one hot encoding allows the representation of categorical data to be more expressive. 

In [18]:
# One-hot encode categorical features
df = pd.get_dummies(df)
df.head(5)

Unnamed: 0,a(cm),h(cm),permitivity,ratio
0,6.8,0.08,2.32,0.003392
1,6.8,0.159,2.32,0.006692
2,6.8,0.318,2.32,0.013159
3,5.0,0.159,2.32,0.009106
4,3.8,0.1524,2.49,0.011567


In [8]:
print('Shape of features after one-hot encoding:', df.shape)

Shape of features after one-hot encoding: (20, 5)


## Features and Labels

In [9]:
# Labels are the values we want to predict
labels = df['Measured_frequency']

# Remove the labels from the features
df = df.drop('Measured_frequency', axis = 1)

# Saving feature names for later use
feature_list = list(df.columns)

## Train Test Split

In [10]:
# Using Skicit-learn to split data into training and testing sets
from sklearn.model_selection import train_test_split

# Split the data into training and testing sets
train_features, test_features, train_labels, test_labels = train_test_split(df,
                                                                            labels,
                                                                            test_size = 0.20,
                                                                            random_state = 42)

In [11]:
print('Training Features Shape:', train_features.shape)
print('Training Labels Shape:', train_labels.shape)
print('Testing Features Shape:', test_features.shape)
print('Testing Labels Shape:', test_labels.shape)

Training Features Shape: (16, 4)
Training Labels Shape: (16,)
Testing Features Shape: (4, 4)
Testing Labels Shape: (4,)


## Training the Forest

In [12]:
# Import the model we are using
from sklearn.ensemble import RandomForestRegressor

# Instantiate model 
rf = RandomForestRegressor(n_estimators= 1000, random_state=42)

# Train the model on training data
rf.fit(train_features, train_labels);

## Make Predictions on Test Data

In [14]:
# Use the forest's predict method on the test data
predictions = rf.predict(test_features)

# Calculate the absolute errors
errors = abs(predictions - test_labels)

# Print out the mean absolute error (mae)
print('Mean Absolute Error:', round(np.mean(errors), 2), 'Hz')


Mean Absolute Error: 280.04 Hz


In [15]:
# Calculate mean absolute percentage error (MAPE)
mape = 100 * (errors / test_labels)

# Calculate and display accuracy
accuracy = 100 - np.mean(mape)
print('Accuracy:', round(accuracy, 2), '%.')

Accuracy: 85.18 %.


## Visualizing a Single Decision Tree

In [16]:
# Pull out one tree from the forest
tree = rf.estimators_[5]

# Export the image to a dot file
export_graphviz(tree, out_file = 'tree.dot', feature_names = feature_list, rounded = True, precision = 1)

# Use dot file to create a graph
(graph, ) = pydot.graph_from_dot_file('tree.dot')

# Write graph to a png file
graph.write_png('tree.png'); 

![Decision Tree](tree.png)

In [17]:
print('The depth of this tree is:', tree.tree_.max_depth)

The depth of this tree is: 5


Smaller tree for visualization.

![Small Decision Tree](small_tree.PNG)

If you find the notebook useful, please consider **upvoting**<p>
**Thank you !**