**Import dependancies**

In [181]:
import pandas
import numpy
from sklearn.ensemble import RandomForestRegressor
from sklearn import tree, metrics

**Import the dataset**

In [182]:
dataSet = pandas.read_csv("RandomForest-CSV.csv")
print(dataSet)

     OutputAdvice  Headache  SoreThroat  Coughing  Tired
0      Painkiller         1           0         0      0
1             Flu         1           1         1      0
2  GlandularFever         1           0         0      1
3      Tonsilitis         0           1         0      1
4          Stress         0           0         0      1


**Convert non-numerical fields to numerical values**

In [183]:
convert = {'Painkiller': 0, 'Flu': 1, 'GlandularFever': 2, 'Tonsilitis': 3, 'Stress': 4}
dataSet['OutputAdvice'] = dataSet['OutputAdvice'].map(convert)
print(dataSet)

   OutputAdvice  Headache  SoreThroat  Coughing  Tired
0             0         1           0         0      0
1             1         1           1         1      0
2             2         1           0         0      1
3             3         0           1         0      1
4             4         0           0         0      1


**Divide the data into features (Attributes), and output (labels)**

In [184]:
features = ['Headache', 'SoreThroat', 'Coughing', 'Tired']
input = dataSet[features]
output = dataSet['OutputAdvice']
print(input)
print(output)

   Headache  SoreThroat  Coughing  Tired
0         1           0         0      0
1         1           1         1      0
2         1           0         0      1
3         0           1         0      1
4         0           0         0      1
0    0
1    1
2    2
3    3
4    4
Name: OutputAdvice, dtype: int64


**SKLEARN has the ability to automtically split dataset into training and testing data**

In [185]:
from sklearn.model_selection import train_test_split
input_train, input_test, output_train, output_test = train_test_split(input, output, test_size=0.2, random_state=0)

**SKLEARN has an ability to scale values so they look nicer on any graphs**

`from sklearn.preprocessing import StandardScaler`
<br>`sc = StandardScaler()`
<br>`input_train = sc.fit_transform(input_train)`
<br>`input_test = sc.transform(input_test)`

**Now create the random forest and train the model**
<br>Estimators are the number of trees in the forest.

In [186]:
rForest = RandomForestRegressor(n_estimators=15, random_state=0)
rForest.fit(input_train, output_train)

**Use the model to test predict new things**

In [187]:
outcome = rForest.predict(input_test)

**Evaluate how good the algorithm is:**
<br>For regression problems the metrics used to evaluate an algorithm are mean absolute error, mean squared error, and root mean squared error. If the root mean squared error is more than 10% of the average of the output field this may indicate that you haven't used enough estimators (trees).

In [188]:
print('Mean Absolute Error:', metrics.mean_absolute_error(output_test, outcome))
print('Mean Squared Error:', metrics.mean_squared_error(output_test, outcome))
evalValue = numpy.sqrt(metrics.mean_squared_error(output_test, outcome))
print('Root Mean Squared Error:', evalValue)

avgOutput = numpy.average(output)

if evalValue > avgOutput*0.1:
    print('\033[91m'+'Your algorithm needs improving'+'\033[0m')

Mean Absolute Error: 0.1333333333333333
Mean Squared Error: 0.01777777777777777
Root Mean Squared Error: 0.1333333333333333


**Now finally use the model to predict an actual result**
<br>Set your prediction data, you can do it manually or use a dataframe to avoid user warnings

In [189]:
predictData = pandas.DataFrame(columns=features, index=[0])
predictData.loc[0] = pandas.Series({'Headache': 1, 'SoreThroat': 0, 'Coughing': 0, 'Tired': 1})

outcome = rForest.predict(predictData)
print(outcome)

[2.13333333]


**Round the result in case its not a whole number before convert to normal text**
<br>`def` is short for definition - it is the python way of defining a method/function

In [190]:
def round_half_up(n, decimals=0):
    multiplier = 10 ** decimals
    return numpy.floor(n*multiplier + 0.5) / multiplier

outcome = round_half_up(outcome, decimals=0)
print(outcome)

[2.]


**Now convert to text and give an output**

In [191]:
if outcome == 0:
    outcome = 'Painkiller'
elif outcome == 1:
    outcome = 'Flu'
elif outcome == 2:
    outcome = 'GlandularFever'
elif outcome == 3:
    outcome = 'Tonsilitis'
elif outcome == 4:
    outcome = 'Stress'

print('The doctors diagnosis is: '+outcome)

The doctors diagnosis is: GlandularFever
