In [1]:
import pandas as pd
import numpy as np 
import os
import plotly.graph_objects as go
import plotly.io as pio
pio.renderers
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
data = pd.read_csv('drug_consumption.data',header=None)

In [3]:
data = data.iloc[:, 1:]

In [4]:
data.rename(columns={1: "Age", 2: "Gender", 3: "Education",4: "Country",
                     5: "Ethnicity",6: "Neuroticism",7: "Extraversion",8: "Openness",
                     9: "Agreeableness",10: "Conscientiouness",11: "Impulsiveness",12: "Sensation Seing",
                     13: "Alcohol",14: "Amphet",15: "Amyl",16: "Benzos",
                     17: "Caff",18: "Cannabis",19: "Choc",20: "Coke",
                     21: "Crack",22: "Ecstasy",23: "Heroin",24: "Ketamine",
                     25: "Legalh",26: "LSD",27: "Meth",28: "Mushrooms",
                     29: "Nicotine",30: "Semer",31: "VSA"}, inplace = True)

In [5]:
Personality_measure = ['Neuroticism','Extraversion','Openness','Agreeableness','Conscientiouness','Impulsiveness','Sensation Seing']
Drugs = ['Alcohol','Amphet','Amyl','Benzos','Caff','Cannabis','Choc','Coke','Crack','Ecstasy','Heroin','Ketamine','Legalh','LSD','Meth','Mushrooms','Nicotine','Semer','VSA']
Participants = ['Age','Gender','Education','Country','Ethnicity']


# data set especifico para os bar plots

In [6]:
slider = data.copy()

In [7]:
for drug in Drugs:
    slider.loc[slider[drug] == 'CL0', drug] = 'Never'
    slider.loc[slider[drug] == 'CL1', drug] = 'More than a year'
    slider.loc[slider[drug] == 'CL2', drug] = 'More than a year'
    slider.loc[slider[drug] == 'CL3', drug] = 'More than a year'
    slider.loc[slider[drug] == 'CL4', drug] = 'Less than a year'
    slider.loc[slider[drug] == 'CL5', drug] = 'Less than a year'
    slider.loc[slider[drug] == 'CL6', drug] = 'Less than a year'

In [8]:
usage_rate = ['Never', 'More than a year', 'Less than a year']

In [9]:
never_list = []
for drug in Drugs:
    never_list.append((slider[drug]=='Never').sum())
    
more_year_list = []
for drug in Drugs:
    more_year_list.append((slider[drug]=='More than a year').sum())
    
less_year_list = []    
for drug in Drugs:
    less_year_list.append((slider[drug]=='Less than a year').sum())


### fazer dataset com o numero de acontecimentos do tipo never, more than a year and ....

In [10]:
d = {'Never': never_list, 'More Than a Year Ago': more_year_list, 'Less Than a Year Ago':less_year_list}

In [11]:
df_slider = pd.DataFrame(data=d)

In [12]:
df_slider = df_slider.set_axis(Drugs)

In [13]:
df_slider = df_slider.transpose()

In [None]:
df_slider.to_csv('histogram_usage.csv')

# Data set especifico para o heatmap

In [14]:
heatmap = data.copy()

In [15]:
for drug in Drugs:
    heatmap.loc[heatmap[drug] == 'CL0', drug] = 0
    heatmap.loc[heatmap[drug] == 'CL1', drug] = 1
    heatmap.loc[heatmap[drug] == 'CL2', drug] = 2
    heatmap.loc[heatmap[drug] == 'CL3', drug] = 3
    heatmap.loc[heatmap[drug] == 'CL4', drug] = 4
    heatmap.loc[heatmap[drug] == 'CL5', drug] = 5
    heatmap.loc[heatmap[drug] == 'CL6', drug] = 6

In [16]:
scaler = MinMaxScaler()
scaler.fit(heatmap[Drugs])
scaler_data = scaler.transform(heatmap[Drugs])

In [17]:
df_heatmap = pd.DataFrame(scaler_data, columns = Drugs)

In [18]:
df_heatmap = pd.concat([heatmap[Personality_measure],heatmap[Participants],df_heatmap ], axis=1)

In [19]:
corrM = df_heatmap.corr()

In [20]:
CorrM = corrM[Drugs]

In [21]:
CorrM.drop(Drugs, inplace= True)



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [22]:
CorrM.reset_index(level=0, inplace = True)

In [33]:
CorrM.to_csv('correlation_heatmap.csv')

# data set especifico para o radar chart

In [23]:
radar = data.copy()

### age

In [24]:

radar.loc[radar['Age'] == -0.95197, 'Age'] = '18-24'
radar.loc[radar['Age'] == -0.07854, 'Age'] = '25-34'
radar.loc[radar['Age'] == 0.49788, 'Age'] = '35-44'
radar.loc[radar['Age'] == 1.09449, 'Age'] = '45-54'
radar.loc[radar['Age'] == 1.82213, 'Age'] = '55-64'
radar.loc[radar['Age'] == 2.59171, 'Age'] = '65+'

In [25]:
radar.loc[radar['Gender'] == -0.48246, 'Gender'] = 'Male'
radar.loc[radar['Gender'] == 0.48246, 'Gender'] = 'Female'

In [26]:
radar.loc[radar['Education'] == -2.43591 , 'Education'] = 'School Dropout'
radar.loc[radar['Education'] == -1.73790, 'Education'] = 'School Dropout'
radar.loc[radar['Education'] == -1.43719 , 'Education'] = 'School Dropout'
radar.loc[radar['Education'] == -1.22751 , 'Education'] = 'School Dropout'
radar.loc[radar['Education'] == -0.61113, 'Education'] = 'college - no degree'
radar.loc[radar['Education'] == -0.05921, 'Education'] = 'Professional certificate'
radar.loc[radar['Education'] == 0.45468, 'Education'] = 'University Degree'
radar.loc[radar['Education'] == 1.16365, 'Education'] = 'Masters or Doctorate Degree'
radar.loc[radar['Education'] == 1.98437, 'Education'] = 'Masters or Doctorate Degree'

In [27]:
scaler_radar = MinMaxScaler(feature_range = (0,20))
scaler_radar.fit(radar[Personality_measure])
scaled_radar = scaler_radar.transform(radar[Personality_measure])

In [28]:
df_radar = pd.DataFrame(scaled_radar, columns = Personality_measure)
df_radar = pd.concat([radar[Drugs],radar[Participants],df_radar ], axis=1)
df_radar.drop(['Country','Ethnicity'],axis=1)

Unnamed: 0,Alcohol,Amphet,Amyl,Benzos,Caff,Cannabis,Choc,Coke,Crack,Ecstasy,...,Age,Gender,Education,Neuroticism,Extraversion,Openness,Agreeableness,Conscientiouness,Impulsiveness,Sensation Seing
0,CL5,CL2,CL0,CL2,CL6,CL0,CL5,CL0,CL0,CL0,...,35-44,Female,Professional certificate,11.211242,8.242327,8.713797,7.353075,9.980805,8.569486,4.487964
1,CL5,CL2,CL2,CL0,CL6,CL4,CL6,CL3,CL0,CL4,...,25-34,Male,Masters or Doctorate Degree,8.269487,15.922118,15.251330,12.196538,9.587889,6.758405,9.313161
2,CL6,CL0,CL0,CL0,CL6,CL3,CL4,CL0,CL0,CL0,...,35-44,Male,Professional certificate,8.895758,12.459521,7.858778,5.321214,7.071609,4.308017,12.399149
3,CL4,CL0,CL0,CL3,CL5,CL2,CL4,CL2,CL0,CL0,...,18-24,Female,Masters or Doctorate Degree,9.840894,7.537669,10.540455,11.704269,11.688306,4.308017,4.487964
4,CL4,CL1,CL1,CL0,CL6,CL3,CL6,CL0,CL0,CL1,...,35-44,Female,Masters or Doctorate Degree,12.465507,5.010889,9.139897,9.129074,13.770162,8.569486,9.313161
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1880,CL5,CL0,CL0,CL0,CL4,CL5,CL4,CL0,CL0,CL0,...,18-24,Female,college - no degree,6.737793,15.317493,16.707980,12.196538,6.715468,12.594702,20.000000
1881,CL5,CL0,CL0,CL0,CL5,CL3,CL4,CL0,CL0,CL2,...,18-24,Male,college - no degree,9.550999,15.317493,12.491993,12.196538,5.617084,12.594702,14.218654
1882,CL4,CL6,CL5,CL5,CL6,CL6,CL6,CL4,CL0,CL4,...,25-34,Female,University Degree,13.644916,5.795909,6.471985,4.885058,6.002090,11.306853,7.762342
1883,CL5,CL0,CL0,CL0,CL6,CL6,CL5,CL0,CL0,CL3,...,18-24,Female,college - no degree,12.986351,4.130204,11.553030,5.321214,2.572683,14.101359,16.515033


In [29]:
for drug in Drugs:
    df_radar.loc[df_radar[drug] == 'CL0', drug] = 'Never'
    df_radar.loc[df_radar[drug] == 'CL1', drug] = 'More than a year'
    df_radar.loc[df_radar[drug] == 'CL2', drug] = 'More than a year'
    df_radar.loc[df_radar[drug] == 'CL3', drug] = 'More than a year'
    df_radar.loc[df_radar[drug] == 'CL4', drug] = 'Less than a year'
    df_radar.loc[df_radar[drug] == 'CL5', drug] = 'Less than a year'
    df_radar.loc[df_radar[drug] == 'CL6', drug] = 'Less than a year'

In [30]:
df_radar.drop(['Country','Ethnicity'],axis=1, inplace=True)

In [65]:
df_radar.to_csv('radar_chart.csv')