In [1]:
import numpy as np 
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
# load and explore file
file = pd.read_csv("sentiments.csv")
file.head()

Unnamed: 0,Timestamp,What type of data interests you the most?
0,9/26/2022 21:20,"Financial, Sociological, Consumer Behavior, So..."
1,9/26/2022 21:20,"Financial, Consumer Behavior"
2,9/26/2022 21:20,"Educational, Psychological, Consumer Behavior,..."
3,9/26/2022 21:20,"Healthcare, Financial, Consumer Behavior"
4,9/26/2022 21:20,"Healthcare, Financial, Educational, Consumer B..."


In [3]:
# data can't be analyzed yet, let's transform it
file.describe()

Unnamed: 0,Timestamp,What type of data interests you the most?
count,17,17
unique,4,16
top,9/26/2022 21:21,"Healthcare, Financial, Educational, Consumer B..."
freq,9,2


In [4]:
# rename columns
file.columns = ["timestamp", "interests"]
file.head()

Unnamed: 0,timestamp,interests
0,9/26/2022 21:20,"Financial, Sociological, Consumer Behavior, So..."
1,9/26/2022 21:20,"Financial, Consumer Behavior"
2,9/26/2022 21:20,"Educational, Psychological, Consumer Behavior,..."
3,9/26/2022 21:20,"Healthcare, Financial, Consumer Behavior"
4,9/26/2022 21:20,"Healthcare, Financial, Educational, Consumer B..."


In [5]:
# create "dummy" variables
dfile = file.copy()
dfile = dfile.join(dfile.pop('interests').str.get_dummies(', '))
dfile.head()

Unnamed: 0,timestamp,Consumer Behavior,Educational,Entertainment/E-commerce,Financial,Healthcare,Meteorological,Mostly music and entertainment,Psychological,Social Media,Sociological,Transportation/Logistics field
0,9/26/2022 21:20,1,0,0,1,0,0,0,0,1,1,0
1,9/26/2022 21:20,1,0,0,1,0,0,0,0,0,0,0
2,9/26/2022 21:20,1,1,0,0,0,0,0,1,1,0,0
3,9/26/2022 21:20,1,0,0,1,1,0,0,0,0,0,0
4,9/26/2022 21:20,1,1,0,1,1,0,0,0,1,0,0


In [6]:
# remove timestamp
dfile = dfile.drop(axis=1, labels="timestamp")
dfile.head()

Unnamed: 0,Consumer Behavior,Educational,Entertainment/E-commerce,Financial,Healthcare,Meteorological,Mostly music and entertainment,Psychological,Social Media,Sociological,Transportation/Logistics field
0,1,0,0,1,0,0,0,0,1,1,0
1,1,0,0,1,0,0,0,0,0,0,0
2,1,1,0,0,0,0,0,1,1,0,0
3,1,0,0,1,1,0,0,0,0,0,0
4,1,1,0,1,1,0,0,0,1,0,0


In [7]:
# lets do some visualizations!
# corr heatmap
correlate = dfile.corr()
correlate

Unnamed: 0,Consumer Behavior,Educational,Entertainment/E-commerce,Financial,Healthcare,Meteorological,Mostly music and entertainment,Psychological,Social Media,Sociological,Transportation/Logistics field
Consumer Behavior,1.0,0.203091,-0.338502,0.632627,0.117698,-0.112367,0.184637,0.117698,0.696311,-0.13241,-0.338502
Educational,0.203091,1.0,-0.235702,-0.169031,0.169031,0.021517,-0.235702,0.169031,0.291667,-0.07043,-0.235702
Entertainment/E-commerce,-0.338502,-0.235702,1.0,0.209165,0.298807,0.684653,-0.0625,0.298807,-0.235702,0.298807,-0.0625
Financial,0.632627,-0.169031,0.209165,1.0,0.457143,0.305505,0.209165,-0.028571,0.30989,-0.028571,-0.298807
Healthcare,0.117698,0.169031,0.298807,0.457143,1.0,0.436436,-0.209165,-0.214286,-0.07043,-0.214286,-0.209165
Meteorological,-0.112367,0.021517,0.684653,0.305505,0.436436,1.0,-0.091287,0.436436,0.021517,0.436436,-0.091287
Mostly music and entertainment,0.184637,-0.235702,-0.0625,0.209165,-0.209165,-0.091287,1.0,0.298807,0.265165,-0.209165,-0.0625
Psychological,0.117698,0.169031,0.298807,-0.028571,-0.214286,0.436436,0.298807,1.0,0.408491,0.514286,-0.209165
Social Media,0.696311,0.291667,-0.235702,0.30989,-0.07043,0.021517,0.265165,0.408491,1.0,0.169031,-0.235702
Sociological,-0.13241,-0.07043,0.298807,-0.028571,-0.214286,0.436436,-0.209165,0.514286,0.169031,1.0,-0.209165


In [8]:
sns.heatmap(correlate)

ValueError: 'colorblind' is not a valid value for name; supported values are 'Accent', 'Accent_r', 'Blues', 'Blues_r', 'BrBG', 'BrBG_r', 'BuGn', 'BuGn_r', 'BuPu', 'BuPu_r', 'CMRmap', 'CMRmap_r', 'Dark2', 'Dark2_r', 'GnBu', 'GnBu_r', 'Greens', 'Greens_r', 'Greys', 'Greys_r', 'OrRd', 'OrRd_r', 'Oranges', 'Oranges_r', 'PRGn', 'PRGn_r', 'Paired', 'Paired_r', 'Pastel1', 'Pastel1_r', 'Pastel2', 'Pastel2_r', 'PiYG', 'PiYG_r', 'PuBu', 'PuBuGn', 'PuBuGn_r', 'PuBu_r', 'PuOr', 'PuOr_r', 'PuRd', 'PuRd_r', 'Purples', 'Purples_r', 'RdBu', 'RdBu_r', 'RdGy', 'RdGy_r', 'RdPu', 'RdPu_r', 'RdYlBu', 'RdYlBu_r', 'RdYlGn', 'RdYlGn_r', 'Reds', 'Reds_r', 'Set1', 'Set1_r', 'Set2', 'Set2_r', 'Set3', 'Set3_r', 'Spectral', 'Spectral_r', 'Wistia', 'Wistia_r', 'YlGn', 'YlGnBu', 'YlGnBu_r', 'YlGn_r', 'YlOrBr', 'YlOrBr_r', 'YlOrRd', 'YlOrRd_r', 'afmhot', 'afmhot_r', 'autumn', 'autumn_r', 'binary', 'binary_r', 'bone', 'bone_r', 'brg', 'brg_r', 'bwr', 'bwr_r', 'cividis', 'cividis_r', 'cool', 'cool_r', 'coolwarm', 'coolwarm_r', 'copper', 'copper_r', 'crest', 'crest_r', 'cubehelix', 'cubehelix_r', 'flag', 'flag_r', 'flare', 'flare_r', 'gist_earth', 'gist_earth_r', 'gist_gray', 'gist_gray_r', 'gist_heat', 'gist_heat_r', 'gist_ncar', 'gist_ncar_r', 'gist_rainbow', 'gist_rainbow_r', 'gist_stern', 'gist_stern_r', 'gist_yarg', 'gist_yarg_r', 'gnuplot', 'gnuplot2', 'gnuplot2_r', 'gnuplot_r', 'gray', 'gray_r', 'hot', 'hot_r', 'hsv', 'hsv_r', 'icefire', 'icefire_r', 'inferno', 'inferno_r', 'jet', 'jet_r', 'magma', 'magma_r', 'mako', 'mako_r', 'nipy_spectral', 'nipy_spectral_r', 'ocean', 'ocean_r', 'pink', 'pink_r', 'plasma', 'plasma_r', 'prism', 'prism_r', 'rainbow', 'rainbow_r', 'rocket', 'rocket_r', 'seismic', 'seismic_r', 'spring', 'spring_r', 'summer', 'summer_r', 'tab10', 'tab10_r', 'tab20', 'tab20_r', 'tab20b', 'tab20b_r', 'tab20c', 'tab20c_r', 'terrain', 'terrain_r', 'turbo', 'turbo_r', 'twilight', 'twilight_r', 'twilight_shifted', 'twilight_shifted_r', 'viridis', 'viridis_r', 'vlag', 'vlag_r', 'winter', 'winter_r'

In [None]:
# get count of interests
count = dfile.sum(axis = 0, skipna = True).sort_values(ascending=False)
count

In [None]:
# plot with bar graph
count.plot(kind="bar")