**Import dependancies**

In [51]:
import pandas
import numpy
from sklearn.ensemble import RandomForestRegressor
from sklearn import tree, metrics
from sklearn.preprocessing import StandardScaler

**Import the dataset**

In [52]:
dataSet = pandas.read_csv("z-AssignmentTrainingData100.csv")
#print(dataSet)

**Convert non-numerical fields to numerical values**

In [53]:
convert = {True: 1, False: 0}
dataSet['SuccessTF'] = dataSet['SuccessTF'].map(convert)
#print(dataSet)

**Divide the data into features (Attributes), and output (labels)**

In [54]:
features = ['OvertakingSpeedMPS', 'OncomingSpeedMPS', 'InitialSeparationM']
input = dataSet[features]
output = dataSet['SuccessTF']
#print(input)
#print(output)

**SKLEARN has the ability to automtically split dataset into training and testing data**

In [55]:
from sklearn.model_selection import train_test_split
input_train, input_test, output_train, output_test = train_test_split(input, output, test_size=0.2, random_state=0)

**SKLEARN has an ability to scale values so they look nicer on any graphs**

In [56]:
sc = StandardScaler()
input_train = sc.fit_transform(input_train)
input_test = sc.transform(input_test)

**Now create the random forest and train the model**
<br>Estimators are the number of trees in the forest.

In [57]:
rForest = RandomForestRegressor(n_estimators=150, random_state=0)
rForest.fit(input_train, output_train)

**Use the model to test predict new things**

In [58]:
outcome = rForest.predict(input_test)

**Evaluate how good the algorithm is:**
<br>For regression problems you can use metrics to evaluate an algorithm:
- mean absolute error (expect to be within 10% of range of values)
- root mean squared error (expect to be within 10% of average)

In [59]:
mae = metrics.mean_absolute_error(output_test, outcome)
print('Mean Absolute Error:', mae)
maeCheck = (max(output_test)-min(output_test))*0.1
if mae > maeCheck:
    print('\033[91m'+'MAE indicates your algorithm needs improving'+'\033[0m')


evalValue = numpy.sqrt(metrics.mean_squared_error(output_test, outcome))
print('Root Mean Squared Error:', evalValue)
avgOutput = numpy.average(output)
if evalValue > avgOutput*0.1:
    print('\033[91m'+'RSME indicates your algorithm needs improving'+'\033[0m')

Mean Absolute Error: 0.1716666666666667
[91mMAE indicates your algorithm needs improving[0m
Root Mean Squared Error: 0.3153798415315165
[91mRSME indicates your algorithm needs improving[0m


**Now finally use the model to predict an actual result**
<br>Set your prediction data, you can do it manually or use a dataframe to avoid user warnings

In [60]:
predictData = pandas.DataFrame(columns=features, index=[0])
#predictData.loc[0] = pandas.Series({'OvertakingSpeedMPS': 30, 'OncomingSpeedMPS': 20, 'InitialSeparationM': 300})
predictData.loc[0] = pandas.Series({'OvertakingSpeedMPS': 31.6, 'OncomingSpeedMPS': 20.8, 'InitialSeparationM': 157.6})

outcome = rForest.predict(predictData)
print(outcome)

[0.82]




**Round the result in case its not a whole number before convert to normal text**
<br>`def` is short for definition - it is the python way of defining a method/function

In [61]:
def round_half_up(n, decimals=0):
    multiplier = 10 ** decimals
    return numpy.floor(n*multiplier + 0.5) / multiplier

outcome = round_half_up(outcome, decimals=0)
print(outcome)

[1.]


**Now convert to text and give an output**

In [62]:
if outcome == 0:
    print('\033[91m'+'\033[1m'+'I recommend you DO NOT overtake.'+'\033[0m')
elif outcome == 1:
    print('\033[92m'+'\033[1m'+'You can safely overtake.'+'\033[0m')
else:
    print('\033[96m'+'\033[1m'+'The outcome is unknown. Proceed at your own risk.'+'\033[0m')

[92m[1mYou can safely overtake.[0m
