In [None]:
import pandas as pd

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

# Read the CSV file into a DataFrame
df_songs = pd.read_csv('songs.xlsx - Sheet1.csv')

# Display the first 5 rows
print(df_songs.head().to_markdown(index=False, numalign="left", stralign="left"))

# Print the column names and their data types
print(df_songs.info())


In [None]:
# Drop the `Unnamed: 0` column
df_songs.drop(columns=['Unnamed: 0'], inplace=True)

# Convert `publishedAt` to datetime
df_songs['publishedAt'] = pd.to_datetime(df_songs['publishedAt'])

# Extract year and month
df_songs['year'] = df_songs['publishedAt'].dt.year
df_songs['month'] = df_songs['publishedAt'].dt.month

# Filter for T-Series
df_t_series = df_songs[df_songs['channelTitle'] == 'T-Series'].copy()

# Calculate engagement metrics
df_t_series['likes_per_view'] = df_t_series['likeCount'] / df_t_series['viewCount']
df_t_series['comments_per_view'] = df_t_series['commentCount'] / df_t_series['viewCount']

# Aggregate by channel
df_agg_channel = df_t_series.groupby('channelTitle')[['viewCount', 'likeCount', 'commentCount']].sum().reset_index()

# Aggregate by year and month
df_agg_time = df_t_series.groupby(['year', 'month'])[['viewCount', 'likeCount', 'commentCount']].sum().reset_index()

# Explode tags and aggregate
df_agg_tags = df_t_series.explode('tags')['tags'].value_counts().reset_index()
df_agg_tags.columns = ['tag', 'frequency']

# Aggregate engagement metrics by year and month
df_agg_engagement = df_t_series.groupby(['year', 'month'])[['likes_per_view', 'comments_per_view']].mean().reset_index()

# Print the results
print("Aggregated metrics by channel:")
print(df_agg_channel.to_markdown(index=False, numalign="left", stralign="left"))

print("\nTop tags:")
print(df_agg_tags.head(10).to_markdown(index=False, numalign="left", stralign="left"))

print("\nAggregated metrics by year and month:")
print(df_agg_time.to_markdown(index=False, numalign="left", stralign="left"))

print("\nAggregated engagement metrics by year and month:")
print(df_agg_engagement.to_markdown(index=False, numalign="left", stralign="left"))
