In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# 📌 Visualization Setup
plt.style.use("ggplot")
plt.rcParams['figure.figsize'] = (10, 5)


In [None]:
# 📂 Load Datasets
users = pd.read_csv("C:/Users/luxmi/Downloads/users.csv")
photos = pd.read_csv("C:/Users/luxmi/Downloads/photos.csv")
likes = pd.read_csv("C:/Users/luxmi/Downloads/likes.csv")
comments = pd.read_csv("C:/Users/luxmi/Downloads/comments.csv")
follows = pd.read_csv("C:/Users/luxmi/Downloads/follows.csv")
photo_tags = pd.read_csv("C:/Users/luxmi/Downloads/photo_tags.csv")
tags = pd.read_csv("C:/Users/luxmi/Downloads/tags.csv")


In [None]:
# 🧹 Rename Inconsistent Column Names
photos.rename(columns={'user ID': 'user_id'}, inplace=True)
likes.rename(columns={'photo': 'photo_id'}, inplace=True)
comments.rename(columns={'Photo id': 'photo_id'}, inplace=True)
tags['tag text'] = tags['tag text'].str.lower()


In [None]:
# 📊 1. Top 10 Users by Photo Uploads - Horizontal Bar Chart
top_users = photos['user_id'].value_counts().head(10)
top_users.sort_values().plot(kind='barh', color='slateblue')
plt.title("Top 10 Users by Photo Uploads")
plt.xlabel("Number of Photos")
plt.ylabel("User ID")
plt.tight_layout()
plt.show()


In [None]:
# ❤️ 2. Top 10 Most Liked Photos - Pie Chart
top_liked = likes['photo_id'].value_counts().head(10)
plt.pie(top_liked, labels=top_liked.index, autopct='%1.1f%%', startangle=140, colors=sns.color_palette("pastel"))
plt.title("Top 10 Most Liked Photos")
plt.axis('equal')
plt.tight_layout()
plt.show()


In [None]:
# 💬 3. Top 10 Most Commented Photos - Line Chart
top_commented = comments['photo_id'].value_counts().head(10)
plt.plot(top_commented.index.astype(str), top_commented.values, marker='o', linestyle='--', color='teal')
plt.title("Top 10 Most Commented Photos")
plt.xlabel("Photo ID")
plt.ylabel("Comments")
plt.grid(True)
plt.tight_layout()
plt.show()


In [None]:
# 🏷️ 4. Most Used Tags - Dot Plot (Seaborn Stripplot)
most_tags = tags['tag text'].value_counts().head(10).reset_index()
most_tags.columns = ['Tag', 'Count']
sns.stripplot(x="Count", y="Tag", data=most_tags, size=10, color='darkorange')
plt.title("Most Used Tags")
plt.tight_layout()
plt.show()


In [None]:
# 📍 5. Top 10 Locations by Tag Usage - Horizontal Bar Chart
top_locations = tags['location'].value_counts().head(10)
top_locations.sort_values().plot(kind='barh', color='mediumseagreen')
plt.title("Top 10 Locations by Tag Usage")
plt.xlabel("Tag Count")
plt.ylabel("Location")
plt.tight_layout()
plt.show()
