<a href="https://colab.research.google.com/github/MonaFaghfouri/Descriptive-data-analysis/blob/main/Hashtags_Filtered_By_Users.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install arabic_reshaper python-bidi -q

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib.font_manager as font_manager
import arabic_reshaper
from bidi.algorithm import get_display
from google.colab import files

# Upload Excel file
print("Upload your Excel file:")
uploaded = files.upload()
df = pd.read_excel(next(iter(uploaded)))

# Upload Persian font file (e.g. IRANSans, B Nazanin)
print("Upload your font file (e.g., .ttf):")
font_uploaded = files.upload()
font_path = next(iter(font_uploaded))
font_manager.fontManager.addfont(font_path)
farsi_font = font_manager.FontProperties(fname=font_path).get_name()

# Helper: fix Persian/Arabic text for matplotlib
def fix_farsi(text):
    return get_display(arabic_reshaper.reshape(text))

# Convert datetime column (assume it's the 3rd column)
dates = pd.to_datetime(df.iloc[:, 2], format="%a %b %d %H:%M:%S %z %Y")

# Count tweets per day
daily_counts = dates.dt.date.value_counts().sort_index()

# Detect peak day
peak_date = daily_counts.idxmax()
peak_value = daily_counts.max()

# Plot
plt.figure(figsize=(12, 6))
plt.plot(daily_counts.index, daily_counts.values, label=fix_farsi("تعداد توییت‌ها"))
plt.xlabel(fix_farsi("تاریخ"), fontsize=12, fontname=farsi_font)
plt.ylabel(fix_farsi("تعداد توییت"), fontsize=12, fontname=farsi_font)
plt.title(fix_farsi("نمودار تعداد توییت‌های روزانه"), fontsize=14, fontname=farsi_font)
plt.grid(True)

# Vertical line at peak date
plt.axvline(peak_date, color='red', linestyle='--', linewidth=2,
            label=fix_farsi(f"بیشترین ({peak_value}) در {peak_date}"))

# Format x-axis to show all months including 2025
ax = plt.gca()
ax.set_xlim([daily_counts.index.min(), daily_counts.index.max()])  # full range
ax.xaxis.set_major_locator(mdates.MonthLocator(interval=1))        # show every month
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))        # format labels
plt.xticks(rotation=45, fontname=farsi_font)

# Add legend
plt.legend(prop={'family': farsi_font})

# Save figure
output_filename = "daily_tweet_counts.png"
plt.savefig(output_filename, dpi=300, bbox_inches='tight')

# Download the image
files.download(output_filename)


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib.font_manager as font_manager
import arabic_reshaper
from bidi.algorithm import get_display
from google.colab import files

# Upload Excel file
print("Upload Excel file:")
uploaded = files.upload()
df = pd.read_excel(next(iter(uploaded)))

#  Upload Persian font
print(" Upload Persian font:")
font_uploaded = files.upload()
font_path = next(iter(font_uploaded))
font_manager.fontManager.addfont(font_path)
farsi_font = font_manager.FontProperties(fname=font_path).get_name()

#  Function to fix Persian text
def fix_farsi(text):
    return get_display(arabic_reshaper.reshape(text))

#  Parse datetime and count tweets per day
df['datetime'] = pd.to_datetime(df.iloc[:, 2], format="%a %b %d %H:%M:%S %z %Y")
df['date'] = df['datetime'].dt.date

#  Count tweets per day
daily_counts = df['date'].value_counts().sort_index()
df_daily = pd.DataFrame({'Date': daily_counts.index, 'TweetCount': daily_counts.values})
df_daily.set_index('Date', inplace=True)

#  Moving averages
df_daily['MA_7'] = df_daily['TweetCount'].rolling(window=7).mean()
df_daily['MA_30'] = df_daily['TweetCount'].rolling(window=30).mean()
df_daily['MA_90'] = df_daily['TweetCount'].rolling(window=90).mean()

#  Plot
plt.figure(figsize=(12, 6))
plt.plot(df_daily.index, df_daily['TweetCount'], label=fix_farsi('تعداد اصلی توییت‌ها'), color='skyblue')
plt.plot(df_daily.index, df_daily['MA_7'], label=fix_farsi('میانگین متحرک ۷ روزه'), color='red')
plt.plot(df_daily.index, df_daily['MA_30'], label=fix_farsi('میانگین متحرک ۳۰ روزه'), color='blue')
plt.plot(df_daily.index, df_daily['MA_90'], label=fix_farsi('میانگین متحرک ۹۰ روزه'), color='green')

#  Set custom x-axis: from 2024-04 to 2025-02, monthly ticks
ax = plt.gca()
start_date = pd.to_datetime("2024-04-01")
end_date = pd.to_datetime("2025-02-28")
ax.set_xlim([start_date, end_date])
ax.xaxis.set_major_locator(mdates.MonthLocator(interval=1))
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))

#  Labels and styles
plt.title(fix_farsi('میانگین متحرک تعداد توییت‌ها'), fontname=farsi_font)
plt.xlabel(fix_farsi('تاریخ'), fontname=farsi_font)
plt.ylabel(fix_farsi('تعداد توییت'), fontname=farsi_font)
plt.xticks(rotation=45, fontname=farsi_font)
plt.yticks(fontname=farsi_font)
plt.grid(True)
plt.legend(prop={'family': farsi_font})

#  Save and download
output_filename = "tweet_moving_average_custom_range.png"
plt.savefig(output_filename, dpi=300, bbox_inches='tight')
files.download(output_filename)


In [None]:

import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib.font_manager as font_manager
import arabic_reshaper
from bidi.algorithm import get_display
from statsmodels.tsa.seasonal import seasonal_decompose  #  Import added
from google.colab import files

# Upload the data file
print(" Please upload the Excel file:")
uploaded = files.upload()
df = pd.read_excel(next(iter(uploaded)))

# Upload Persian font file
print(" Please upload the Persian font file:")
font_uploaded = files.upload()
font_path = next(iter(font_uploaded))
font_manager.fontManager.addfont(font_path)
farsi_font = font_manager.FontProperties(fname=font_path).get_name()

# Function to fix Persian text rendering
def fix_farsi(text):
    return get_display(arabic_reshaper.reshape(text))

# Process date and create time series
df['datetime'] = pd.to_datetime(df.iloc[:, 2], format="%a %b %d %H:%M:%S %z %Y")
df['date'] = df['datetime'].dt.date
daily_counts = df['date'].value_counts().sort_index()
df_daily = pd.DataFrame({'Date': daily_counts.index, 'TweetCount': daily_counts.values})
df_daily.set_index('Date', inplace=True)
df_daily = df_daily.asfreq('D')  # Convert to complete daily time series

# Set custom date range: 2024-03-20 to 2025-02-18
df_daily = df_daily.loc["2024-03-20":"2025-02-18"]

# Decomposition with 3 different windows (7, 30, 90 days)
windows = [7, 30, 90]
components = {}

for w in windows:
    decomposition = seasonal_decompose(df_daily['TweetCount'].interpolate(), model='additive', period=w)
    components[w] = decomposition

# Plot Figure 3-22: Overlayed components
fig, axs = plt.subplots(3, 1, figsize=(15, 10), sharex=True)
colors = ['red', 'blue', 'green']
titles = ['trend', 'seasonal', 'residual']  #  display label only

for i, comp in enumerate(['trend', 'seasonal', 'resid']):
    for idx, w in enumerate(windows):
        series = getattr(components[w], comp)
        axs[i].plot(series.index, series.values, label=f"{titles[i].capitalize()} - {w} Days", color=colors[idx])
    axs[i].legend(loc='upper right')
    axs[i].set_title(fix_farsi(f"Component: {titles[i]}"))

axs[2].set_xlabel(fix_farsi("Date"), fontname=farsi_font)
plt.xticks(rotation=45, fontname=farsi_font)
plt.tight_layout()
plt.savefig("decomposition_overlay.png", dpi=300, bbox_inches='tight')
files.download("decomposition_overlay.png")

# Plot Figure 3-21: Component matrix
fig2, axs = plt.subplots(4, 3, figsize=(15, 10), sharex='col')
for col, w in enumerate(windows):
    dec = components[w]
    axs[0, col].plot(df_daily.index, df_daily['TweetCount'], label="Original")
    axs[1, col].plot(dec.trend)
    axs[2, col].plot(dec.seasonal)
    axs[3, col].plot(dec.resid)

    axs[0, col].set_title(f"Original - {w} Days", fontname=farsi_font)
    axs[1, col].set_title(f"Trend - {w} Days", fontname=farsi_font)
    axs[2, col].set_title(f"Seasonal - {w} Days", fontname=farsi_font)
    axs[3, col].set_title(f"Residual - {w} Days", fontname=farsi_font)

for ax in axs.flatten():
    ax.tick_params(axis='x', rotation=45)

plt.tight_layout()
plt.savefig("decomposition_matrix.png", dpi=300, bbox_inches='tight')
files.download("decomposition_matrix.png")
