In [16]:
import pandas as pd
import numpy as np
import seaborn as sns
import statsmodels
import matplotlib.pyplot as plt
import math

df = pd.read_csv("eeg_psychiatry_data.csv")

In [17]:
print(df.head())

   no. sex   age    eeg.date  education     IQ       main.disorder  \
0    1   M  57.0   2012.8.30        NaN    NaN  Addictive disorder   
1    2   M  37.0    2012.9.6        6.0  120.0  Addictive disorder   
2    3   M  32.0   2012.9.10       16.0  113.0  Addictive disorder   
3    4   M  35.0   2012.10.8       18.0  126.0  Addictive disorder   
4    5   M  36.0  2012.10.18       16.0  112.0  Addictive disorder   

      specific.disorder  AB.A.delta.a.FP1  AB.A.delta.b.FP2  ...  \
0  Alcohol use disorder         35.998557         21.717375  ...   
1  Alcohol use disorder         13.425118         11.002916  ...   
2  Alcohol use disorder         29.941780         27.544684  ...   
3  Alcohol use disorder         21.496226         21.846832  ...   
4  Alcohol use disorder         37.775667         33.607679  ...   

   COH.F.gamma.o.Pz.p.P4  COH.F.gamma.o.Pz.q.T6  COH.F.gamma.o.Pz.r.O1  \
0              55.989192              16.739679              23.452271   
1              45.5956

In [18]:
print(df.columns)

Index(['no.', 'sex', 'age', 'eeg.date', 'education', 'IQ', 'main.disorder',
       'specific.disorder', 'AB.A.delta.a.FP1', 'AB.A.delta.b.FP2',
       ...
       'COH.F.gamma.o.Pz.p.P4', 'COH.F.gamma.o.Pz.q.T6',
       'COH.F.gamma.o.Pz.r.O1', 'COH.F.gamma.o.Pz.s.O2',
       'COH.F.gamma.p.P4.q.T6', 'COH.F.gamma.p.P4.r.O1',
       'COH.F.gamma.p.P4.s.O2', 'COH.F.gamma.q.T6.r.O1',
       'COH.F.gamma.q.T6.s.O2', 'COH.F.gamma.r.O1.s.O2'],
      dtype='object', length=1149)


In [19]:
#splitting date, seperate columns for month, day, year
string_split = df['eeg.date'].str.split('.')
df['year'] = string_split.str.get(0)
df['month'] = string_split.str.get(1)
df['day'] = string_split.str.get(2)




In [20]:
#removing eeg.date column
df = df.drop('eeg.date', axis=1)
print(df.head())

   no. sex   age  education     IQ       main.disorder     specific.disorder  \
0    1   M  57.0        NaN    NaN  Addictive disorder  Alcohol use disorder   
1    2   M  37.0        6.0  120.0  Addictive disorder  Alcohol use disorder   
2    3   M  32.0       16.0  113.0  Addictive disorder  Alcohol use disorder   
3    4   M  35.0       18.0  126.0  Addictive disorder  Alcohol use disorder   
4    5   M  36.0       16.0  112.0  Addictive disorder  Alcohol use disorder   

   AB.A.delta.a.FP1  AB.A.delta.b.FP2  AB.A.delta.c.F7  ...  \
0         35.998557         21.717375        21.518280  ...   
1         13.425118         11.002916        11.942516  ...   
2         29.941780         27.544684        17.150159  ...   
3         21.496226         21.846832        17.364316  ...   
4         37.775667         33.607679        21.865556  ...   

   COH.F.gamma.o.Pz.s.O2  COH.F.gamma.p.P4.q.T6  COH.F.gamma.p.P4.r.O1  \
0              45.678820              30.167520              16.91

In [29]:
#removing 'disorder' from column 'main.disorder'
split_main = df['main.disorder'].str.split(' ')
df['main.disorder'] = split_main.str.get(0)

#removing 'disorder' from column 'specific'
split_spec = df['specific.disorder'].str.split(' ')
df['specific.disorder'] = split_spec.str.get(0)

print(df.head())


   no. sex   age  education     IQ main.disorder specific.disorder  \
0    1   M  57.0        0.0    0.0     Addictive           Alcohol   
1    2   M  37.0        6.0  120.0     Addictive           Alcohol   
2    3   M  32.0       16.0  113.0     Addictive           Alcohol   
3    4   M  35.0       18.0  126.0     Addictive           Alcohol   
4    5   M  36.0       16.0  112.0     Addictive           Alcohol   

   AB.A.delta.a.FP1  AB.A.delta.b.FP2  AB.A.delta.c.F7  ...  \
0         35.998557         21.717375        21.518280  ...   
1         13.425118         11.002916        11.942516  ...   
2         29.941780         27.544684        17.150159  ...   
3         21.496226         21.846832        17.364316  ...   
4         37.775667         33.607679        21.865556  ...   

   COH.F.gamma.o.Pz.s.O2  COH.F.gamma.p.P4.q.T6  COH.F.gamma.p.P4.r.O1  \
0              45.678820              30.167520              16.918761   
1              28.201062              57.108861     

In [27]:
#changing all NAN values to 0
df = df.fillna(0)
print(df.head())

   no. sex   age  education     IQ       main.disorder     specific.disorder  \
0    1   M  57.0        0.0    0.0  Addictive disorder  Alcohol use disorder   
1    2   M  37.0        6.0  120.0  Addictive disorder  Alcohol use disorder   
2    3   M  32.0       16.0  113.0  Addictive disorder  Alcohol use disorder   
3    4   M  35.0       18.0  126.0  Addictive disorder  Alcohol use disorder   
4    5   M  36.0       16.0  112.0  Addictive disorder  Alcohol use disorder   

   AB.A.delta.a.FP1  AB.A.delta.b.FP2  AB.A.delta.c.F7  ...  \
0         35.998557         21.717375        21.518280  ...   
1         13.425118         11.002916        11.942516  ...   
2         29.941780         27.544684        17.150159  ...   
3         21.496226         21.846832        17.364316  ...   
4         37.775667         33.607679        21.865556  ...   

   COH.F.gamma.o.Pz.s.O2  COH.F.gamma.p.P4.q.T6  COH.F.gamma.p.P4.r.O1  \
0              45.678820              30.167520              16.91