In [26]:
import pandas as pd
import numpy as np

df = pd.read_excel("Tweet.xlsx")

df['time'] = pd.to_datetime(df['time'], errors='coerce')

df["Word_Count"] = df["Tweet"].apply(lambda x: len(str(x).split()))
df["Char_Count"] = df["Tweet"].apply(lambda x: len(str(x)))

df["Hour"] = df["time"].dt.hour
df["Day"] = df["time"].dt.day
df["Month"] = df["time"].dt.month
df["Weekday"] = df["time"].dt.day_name()

if "engagement rate" not in df.columns:
    df["engagement rate"] = df["engagements"] / df["impressions"]

task1 = df[
    (df["replies"] > 10) &
    (df["Word_Count"] > 50) &
    (df["Day"] % 2 != 0) &  # Odd date
    (df["Hour"].between(18, 23)) &  # 6 PM – 11 PM
    (df["engagement rate"] > 0.05)
][["Tweet", "media engagements", "media views", "replies", "engagement rate", "Word_Count", "time"]]

print("✅ TASK 1 DATA READY:", task1.shape)

✅ TASK 1 DATA READY: (0, 7)


In [27]:
task2 = df[
    ((df["url clicks"] > 0) | (df["user profile clicks"] > 0) | (df["hashtag clicks"] > 0)) &
    (df["Word_Count"] > 40) &
    (df["Day"] % 2 == 0) &  # Even date
    (df["Hour"].between(15, 17))  # 3 PM – 5 PM
][["Tweet", "url clicks", "user profile clicks", "hashtag clicks", "time", "Word_Count"]]

print("✅ TASK 2 DATA READY:", task2.shape)

df["Retweets_Likes_Sum"] = df["retweets"] + df["likes"]

✅ TASK 2 DATA READY: (0, 6)


In [47]:
df.columns = [c.strip().lower().replace(" ", "_") for c in df.columns]

required_cols = ["retweets", "likes", "impressions", "time", "tweet"]
for col in required_cols:
    if col not in df.columns:
        print(f"⚠️ Missing column: {col}")

for col in ["retweets", "likes", "impressions"]:
    df[col] = pd.to_numeric(df[col], errors="coerce").fillna(0)

df["retweets_likes_sum"] = df["retweets"] + df["likes"]
df["hour"] = df["time"].dt.hour
df["day"] = df["time"].dt.day
df["weekday"] = df["time"].dt.day_name()
df["word_count"] = df["tweet"].apply(lambda x: len(str(x).split()))

task3 = df[
    (~df["weekday"].isin(["Saturday", "Sunday"])) & 
    (df["impressions"] % 2 == 0) &                   
    (df["day"] % 2 != 0) &                           
    (df["word_count"] < 30) &                       
    (df["hour"].between(15, 17))                    
].sort_values("retweets_likes_sum", ascending=False).head(10)

cols_to_show = [col for col in ["tweet", "retweets_likes_sum", "time", "user_profile", "word_count"] if col in df.columns]
task3 = task3[cols_to_show]

print("✅ TASK 3 FIXED DATA READY:", task3.shape)

✅ TASK 3 FIXED DATA READY: (10, 4)


In [49]:
task4 = df[
    (df.get("engagements", 0) % 2 == 0) &
    (df["day"] % 2 != 0) &
    (df["char_count"] > 20) &
    (df["hour"].between(7, 11) | df["hour"].between(15, 17))
].copy()

task4["tweet_cleaned"] = task4["tweet"].apply(lambda x: " ".join([w for w in str(x).split() if "c" not in w.lower()]))

if "has_media" in task4.columns:
    task4 = task4.groupby([task4["time"].dt.month, "has_media"])["engagement_rate"].mean().reset_index()
    task4.columns = ["month", "has_media", "avg_engagement_rate"]
else:
    task4 = task4.groupby(task4["time"].dt.month)["engagement_rate"].mean().reset_index()
    task4.columns = ["month", "avg_engagement_rate"]

print("✅ Task 4 completed:", task4.shape)

✅ Task 4 completed: (5, 2)


In [52]:
median_media_eng = df["media_engagements"].median() if "media_engagements" in df.columns else 0

task5 = df[
    (df.get("media_engagements", 0) > median_media_eng) &
    (df["month"].between(6, 8)) &
    (df["day"] % 2 != 0) &
    (df.get("media_views", 0) % 2 == 0) &
    (df["char_count"] > 20) &
    (df["hour"].between(7, 11) | df["hour"].between(15, 17))
].copy()

task5["tweet_cleaned"] = task5["tweet"].apply(lambda x: " ".join([w for w in str(x).split() if "s" not in w.lower()]))
task5 = task5[["tweet_cleaned", "replies", "retweets", "likes", "time"]].copy()

print("✅ Task 5 completed:", task5.shape)

✅ Task 5 completed: (22, 5)


In [53]:
task6 = df[
    (df["weekday"].isin(["Monday", "Tuesday", "Wednesday", "Thursday", "Friday"])) &
    (df.get("impressions", 0) % 2 == 0) &
    (df["day"] % 2 != 0) &
    (df["char_count"] > 30) &
    (df["hour"].between(7, 11) | df["hour"].between(12, 18))
].copy()

task6["tweet_cleaned"] = task6["tweet"].apply(lambda x: " ".join([w for w in str(x).split() if "d" not in w.lower()]))

if "has_app_opens" in task6.columns:
    task6_grouped = task6.groupby("has_app_opens")["engagement_rate"].mean().reset_index()
    task6_grouped.columns = ["has_app_opens", "avg_engagement_rate"]
else:
    task6_grouped = task6[["engagement_rate"]].mean().reset_index()

print("✅ Task 6 completed:", task6_grouped.shape)

✅ Task 6 completed: (1, 2)
