##### Template is based on the learning instructions from Machine Learning A-Z: AI, Python & R + ChatGPT Prize [2025] course

### Importing the libraries

In [1]:
import numpy as np
import pandas as pd

### Importing the dataset

In [2]:
dataset = pd.read_csv('fibromyalgia_data.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

### Dealing with missing data: 2 methods

In [3]:
dataset.dropna()

Unnamed: 0,Lower neck in front,Edge of upper breast,Arm near the elbow,Knee,Base of the skull in the back of the head,Hip bone,Upper outer buttock,Back of the neck,Back of the shoulders,"Fibromyalgia probability, %"
0,8,8,8,8,8.0,8,8,8,8,100
1,7,7,7,7,7.0,7,7,7,7,100
2,6,6,6,6,6.0,6,6,6,6,80
3,5,5,5,5,5.0,5,5,5,5,60
4,4,4,4,4,4.0,4,4,4,4,50
...,...,...,...,...,...,...,...,...,...,...
2283,1,1,1,1,2.0,1,2,1,1,1
2284,1,1,1,1,2.0,1,1,2,1,10
2285,1,1,1,1,1.0,2,2,1,1,6
2286,1,1,1,1,1.0,2,2,2,1,20


In [4]:
from sklearn.impute import SimpleImputer
imputer = SimpleImputer(missing_values=np.nan, strategy='mean')
imputer.fit(X[:, 0:10])
X[:, 0:10] = imputer.transform(X[:, 0:10])

### Splitting the dataset into the Training set and Test set

In [5]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

In [6]:
print(X_train)

[[9. 2. 9. ... 9. 9. 9.]
 [1. 1. 1. ... 1. 3. 3.]
 [5. 5. 5. ... 5. 5. 5.]
 ...
 [8. 8. 8. ... 6. 6. 6.]
 [5. 5. 5. ... 5. 5. 4.]
 [6. 1. 1. ... 1. 1. 1.]]


In [7]:
print(y_train)

[70 25 70 ... 70 60  6]


In [8]:
print(X_test)

[[6. 6. 1. ... 6. 6. 1.]
 [1. 1. 1. ... 1. 1. 1.]
 [8. 8. 8. ... 8. 8. 8.]
 ...
 [5. 5. 5. ... 4. 4. 4.]
 [9. 9. 9. ... 9. 1. 1.]
 [6. 6. 6. ... 4. 6. 6.]]


In [9]:
print(y_test)

[ 80   1  95  40  95  80  45  70  10   1  95  75 100  40 100  50 100 100
  70 100 100  70  60  60  75  80  40  80 100  40  70  80  85   6  75  80
 100  45   1  80  80  95  65  60  85  80  30  65  50  15  95  85  55  75
 100  80  75  75  70  15  95  15 100 100 100  40  60 100  75  80   1  65
  75  60  75   6  50  60  75  70  10  85  70  75  70  70  60  95   1   6
  70   6 100  65  45  10  70  85  75 100   6  65  65  80 100  50  50  95
  40  40 100  60  40  75  90  80  70  70 100 100  70 100 100  10  80   1
  95  80  80  75  85   6 100  40  70 100 100  75 100  75  70  95  20  80
  75 100  80   6  70  80  95   6 100 100  70  75 100  30  10  95 100  75
  70  80  95  45  70  70  60  75  65  60 100  80  20  70  75  65  85  65
  95  75  15  75  20  80  70  80  20  70  70  80  75  30  85  80   6 100
  60  80 100 100  75 100 100 100  65  40  40 100   6  85  60  85  60 100
  50  20   6  70  30 100  70  95  75  75  20  80  30  70  80  65  75  50
  75  80  95  95  75  30  30  20 100  80  70  30  7

### Feature Scaling

In [10]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [11]:
print(X_train)

[[ 1.1565945  -1.19797633  1.29893231 ...  1.66095395  1.46191985
   1.37922899]
 [-2.010735   -1.58640594 -1.80324861 ... -1.52139991 -0.87664075
  -0.96503329]
 [-0.42707025 -0.03268752 -0.25215815 ...  0.06977702 -0.09712055
  -0.18361253]
 ...
 [ 0.76067832  1.1326013   0.9111597  ...  0.46757125  0.29263955
   0.20709785]
 [-0.42707025 -0.03268752 -0.25215815 ...  0.06977702 -0.09712055
  -0.57432291]
 [-0.03115406 -1.58640594 -1.80324861 ... -1.52139991 -1.65616095
  -1.74645405]]


In [12]:
print(X_test)

[[-0.03115406  0.35574209 -1.80324861 ...  0.46757125  0.29263955
  -1.74645405]
 [-2.010735   -1.58640594 -1.80324861 ... -1.52139991 -1.65616095
  -1.74645405]
 [ 0.76067832  1.1326013   0.9111597  ...  1.26315972  1.07215975
   0.98851861]
 ...
 [-0.42707025 -0.03268752 -0.25215815 ... -0.32801721 -0.48688065
  -0.57432291]
 [ 1.1565945   1.5210309   1.29893231 ...  1.66095395 -1.65616095
  -1.74645405]
 [-0.03115406  0.35574209  0.13561447 ... -0.32801721  0.29263955
   0.20709785]]


### Training the Random Forest Classification model

In [13]:
from sklearn.ensemble import RandomForestClassifier
classifier = RandomForestClassifier(n_estimators = 100, criterion = 'entropy', random_state = 0)
classifier.fit(X_train, y_train)

### Predicting a new result

In [14]:
print(classifier.predict(sc.transform([[2,2,2,4,7,2,2,7,7]])))

[95]


In [15]:
print(classifier.predict(sc.transform([[0,0,0,0,3,0,0,3,3]])))

[25]


### Predicting the Test set results

In [16]:
y_pred = classifier.predict(X_test)

In [17]:
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

[[ 80  80]
 [  6   1]
 [ 85  95]
 [ 40  40]
 [ 95  95]
 [ 80  80]
 [ 70  45]
 [ 70  70]
 [ 10  10]
 [  6   1]
 [ 95  95]
 [ 75  75]
 [100 100]
 [ 60  40]
 [100 100]
 [ 50  50]
 [100 100]
 [100 100]
 [ 75  70]
 [ 80 100]
 [100 100]
 [ 70  70]
 [ 65  60]
 [ 55  60]
 [ 75  75]
 [ 80  80]
 [ 30  40]
 [ 80  80]
 [100 100]
 [ 40  40]
 [ 20  70]
 [ 75  80]
 [ 85  85]
 [  6   6]
 [ 75  75]
 [ 75  80]
 [100 100]
 [ 25  45]
 [  6   1]
 [ 80  80]
 [ 80  80]
 [ 95  95]
 [ 65  65]
 [ 40  60]
 [ 85  85]
 [100  80]
 [ 30  30]
 [ 65  65]
 [ 55  50]
 [  6  15]
 [ 95  95]
 [ 85  85]
 [ 45  55]
 [ 75  75]
 [ 75 100]
 [ 80  80]
 [ 75  75]
 [ 90  75]
 [ 70  70]
 [ 30  15]
 [ 95  95]
 [ 10  15]
 [100 100]
 [100 100]
 [100 100]
 [ 40  40]
 [ 75  60]
 [100 100]
 [ 50  75]
 [ 80  80]
 [  6   1]
 [ 65  65]
 [ 75  75]
 [ 40  60]
 [ 80  75]
 [  6   6]
 [ 55  50]
 [ 65  60]
 [ 75  75]
 [ 70  70]
 [ 10  10]
 [ 85  85]
 [ 70  70]
 [ 80  75]
 [ 70  70]
 [ 65  70]
 [ 60  60]
 [ 95  95]
 [  6   1]
 [ 25   6]
 [ 70  70]

### Making the Confusion Matrix

In [18]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
accuracy_score(y_test, y_pred)

0.6703056768558951

In [19]:
print(cm)

[[ 4  8  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 3 12  0  3  0  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  1 12  1  2  0  0  0  0  0  0  0  0  1  0  0  0  0  0  0  0]
 [ 0  3  1  3  1  0  3  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  9  0  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  1  0  0  1  0  0  1  1  0  0  0  0  0  0  0  0]
 [ 0  0  0  2  2  0  7  0  0  0  1  1  0  1  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0  0  0  0  1  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  1  1  0  8  0  0  0  3  2  2  0  0  0  0  0  0]
 [ 0  0  0  0  0  1  0  0  0  1  1  0  2  0  1  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  1  0  0  0  4  3  3  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  1  0  1  0  1  1  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0  3  0  1  2  9  6  0  1  0  0  0  1  0]
 [ 0  0  0  0  0  0  0  0  1  0  0  0  3 19  1  0  1  0  0  0  0]
 [ 0  0  0  0  1  0  0  0  1  1  0  0  0  3 38  5  0  0  0  0  0]
 [ 0  0  0