In [1]:
#Import Library
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
#Load Data
content = pd.read_csv('../Accenture/Data/Content.csv')
reactions = pd.read_csv('../Accenture/Data/Reactions.csv')
reaction_types = pd.read_csv('../Accenture/Data/ReactionTypes.csv')

In [3]:
#This code snippet drops the first column from the DataFrames and User ID
content.drop(content.columns[0],axis=1,inplace=True)
reactions.drop([reactions.columns[0],'User ID'],axis=1,inplace=True)
reaction_types.drop(reaction_types.columns[0],axis=1,inplace=True)

In [4]:
# Rename columns in content, reactions, and reaction_types
content.rename(columns={'Type': 'Content_Type'}, inplace=True)
reactions.rename(columns={'Type': 'Reaction_Type'}, inplace=True)
reaction_types.rename(columns={'Type': 'Reaction_Type'}, inplace=True)

In [5]:
#Merge the data
merged_data = pd.merge(reactions, content[['Content ID', 'Content_Type', 'Category']], on='Content ID')
merged_data = pd.merge(merged_data, reaction_types[['Reaction_Type', 'Sentiment', 'Score']], on='Reaction_Type')
merged_data.head()

Unnamed: 0,Content ID,Reaction_Type,Datetime,Content_Type,Category,Sentiment,Score
0,97522e57-d9ab-4bd6-97bf-c24d952602d2,disgust,2020-11-07 09:43:50,photo,Studying,negative,0
1,97522e57-d9ab-4bd6-97bf-c24d952602d2,disgust,2021-01-06 19:13:01,photo,Studying,negative,0
2,97522e57-d9ab-4bd6-97bf-c24d952602d2,disgust,2021-04-09 02:46:20,photo,Studying,negative,0
3,9f737e0a-3cdd-4d29-9d24-753f4e3be810,disgust,2021-03-28 21:15:26,photo,healthy eating,negative,0
4,230c4e4d-70c3-461d-b42c-ec09396efb3f,disgust,2020-08-04 05:40:33,photo,healthy eating,negative,0


In [6]:
# Select the desired columns
merged_data = merged_data[['Content ID', 'Reaction_Type', 'Datetime', 'Content_Type', 'Category', 'Sentiment', 'Score']]

In [7]:
merged_data['Category'].unique()

array(['Studying', 'healthy eating', 'dogs', 'public speaking', 'science',
       'tennis', 'food', 'fitness', 'soccer', 'education', 'studying',
       'travel', 'veganism', 'cooking', 'technology', 'animals',
       'culture', 'Fitness', 'Veganism', '"animals"', 'Travel',
       '"soccer"', 'Animals', 'Education', '"dogs"', '"culture"',
       'Soccer', 'Culture', 'Food', '"technology"', '"cooking"',
       '"public speaking"', 'Science', '"veganism"', 'Public Speaking',
       '"science"', '"studying"', '"food"', 'Healthy Eating',
       'Technology'], dtype=object)

In [8]:
#address the reapted column (especially for categorical varibale)!!
merged_data['Category']= merged_data['Category'].replace('"', '', regex=True)
merged_data['Category']=merged_data['Category'].str.lower()

In [9]:
merged_data['Category'].unique()

array(['studying', 'healthy eating', 'dogs', 'public speaking', 'science',
       'tennis', 'food', 'fitness', 'soccer', 'education', 'travel',
       'veganism', 'cooking', 'technology', 'animals', 'culture'],
      dtype=object)

In [10]:
#Top Quantity
top_quantity = merged_data['Category'].value_counts().head(5).reset_index().rename(columns={'index': 'Category', 'Category': 'Count'})
top_quantity.head()

Unnamed: 0,Count,count
0,animals,1897
1,science,1796
2,healthy eating,1717
3,food,1699
4,technology,1698


In [11]:
#Top Scores
top_scores = merged_data.groupby('Category')['Score'].sum().reset_index().sort_values(by='Score', ascending=False).head(5)
top_scores.head()

Unnamed: 0,Category,Score
0,animals,74965
9,science,71168
7,healthy eating,69339
12,technology,68738
6,food,66676


In [12]:
# Save the data
merged_data.to_csv('../Accenture/Data/merged_data.csv', index=False)
top_quantity.to_csv('../Accenture/Data/top_quantity.csv', index=False)
top_scores.to_csv('../Accenture/Data/top_scores.csv', index=False)