<a href="https://www.kaggle.com/code/ahmedanwar89/accenture-virtual-experience-ve-by-forage?scriptVersionId=143632311" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

# import libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()

# import & prepare dataset

In [None]:
content = pd.read_csv('/kaggle/input/accenture-ve-dataset/accenture_data/Content.csv')
reactions = pd.read_csv('/kaggle/input/accenture-ve-dataset/accenture_data/Reactions.csv')
reactions_type = pd.read_csv('/kaggle/input/accenture-ve-dataset/accenture_data/ReactionTypes.csv')

In [None]:
content.drop(columns='Unnamed: 0', axis=1, inplace=True)
reactions.drop(columns='Unnamed: 0', axis=1, inplace=True)
reactions_type.drop(columns='Unnamed: 0', axis=1, inplace=True)

In [None]:
content.rename(columns={'Type': 'content_type'}, inplace=True)

In [None]:
df = pd.merge(left=content, right=reactions, how='inner', on=['Content ID', 'User ID'])

In [None]:
df = pd.merge(left=df, right=reactions_type, how='inner', on='Type')

# clean data

In [None]:
# check data type.
df.dtypes

In [None]:
# change data type of 'Datetime' column to datetime.
df['Datetime'] = pd.to_datetime(df['Datetime'])
df['Datetime'].dtype

In [None]:
# check null values.
df.isnull().any(), df.isnull().sum()

In [None]:
# drop 'URL' column.
df.drop(columns='URL', axis=1, inplace=True)

In [None]:
# check of duplicated values after merge.
df.duplicated().any(), df.duplicated().sum()

In [None]:
# check data validity.
df.select_dtypes(include='object').nunique()

In [None]:
df['content_type'].unique()

In [None]:
df['Sentiment'].unique()

In [None]:
df['Type'].unique()

# Analysis

In [None]:
df.info()

In [None]:
df.describe()

## **What kind of distribution of score records?**

In [None]:
plt.figure(figsize=(12, 4))
plt.hist(x=df['Score'], bins=8)
plt.title('Score Distribution', fontdict={'size': 16, 'weight': 'bold'})
plt.xlabel('Score', fontdict={'size': 12, 'weight': 'bold'})
plt.ylabel('Count', fontdict={'size': 12, 'weight': 'bold'})
plt.show()

## **What is the count for each Sentiment?**

In [None]:
df['Sentiment'].value_counts()

In [None]:
plt.figure(figsize=(12, 4))
plt.barh(data=df, y=df['Sentiment'].value_counts().index, width=df['Sentiment'].value_counts().values)
plt.title('count of each sentiment', fontdict={'size': 16, 'weight': 'bold'})
plt.xlabel('Count', fontdict={'size': 12, 'weight': 'bold'})
plt.ylabel('Sentiment', fontdict={'size': 12, 'weight': 'bold'})
plt.show()

## **What is the count for each Category?**

In [None]:
df['Category'].value_counts()

In [None]:
plt.figure(figsize=(20, 5))
plt.bar(data=df, x=df['Category'].value_counts().index, height=df['Category'].value_counts().values)
plt.title('count of each category', fontdict={'size': 16, 'weight': 'bold'})
plt.xlabel('Category')
plt.ylabel('Count')
plt.show()

## **What is the count for each content_type?**

In [None]:
df['content_type'].value_counts()

In [None]:
plt.figure(figsize=(20, 5))
plt.barh(data=df, y=df['content_type'].value_counts().index, width=df['content_type'].value_counts().values)
plt.title('count of each category', fontdict={'size': 16, 'weight': 'bold'})
plt.xlabel('Count')
plt.ylabel('Content Type')
plt.show()

## **What is the count for each Type?**

In [None]:
df['Type'].value_counts()

In [None]:
plt.figure(figsize=(20, 5))
plt.bar(data=df, x=df['Type'].value_counts().index, height=df['Type'].value_counts().values)
plt.title('count of each category', fontdict={'size': 16, 'weight': 'bold'})
plt.xlabel('Type')
plt.ylabel('Count')
plt.show()