<a href="https://colab.research.google.com/github/MonaFaghfouri/Descriptive-data-analysis/blob/main/%D8%A2Hashtags_Filtered_By_Location.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
from collections import Counter
from google.colab import files
import re

# Upload the file
uploaded = files.upload()

# Read the file name
filename = list(uploaded.keys())[0]

# Read the Excel file
df = pd.read_excel(filename)

# Ensure the sixth column exists
if df.shape[1] < 6:
    raise ValueError("Your file has less than 6 columns.")

# Extract the sixth column (index 5)
text_column = df.iloc[:, 5].dropna().astype(str)

# Tokenize words and count
all_words = []
for text in text_column:
    words = re.findall(r'\w+', text.lower())  # Remove punctuation and convert to lowercase
    all_words.extend(words)

word_counts = Counter(all_words)

# Convert to DataFrame and sort
word_freq_df = pd.DataFrame(word_counts.items(), columns=['کلمه', 'تعداد'])
word_freq_df = word_freq_df.sort_values(by='تعداد', ascending=False)

# Save to a new Excel file
output_filename = 'word_frequencies.xlsx'
word_freq_df.to_excel(output_filename, index=False)

# Provide the file for download
files.download(output_filename)


In [None]:
# Install necessary libraries for Persian text support
!pip install arabic_reshaper python-bidi -q

# Upload font and data files
from google.colab import files
print("Upload your FONT file (e.g., Vazir.ttf):")
uploaded_font = files.upload()

print("Upload your DATA file (e.g., Excel or CSV):")
uploaded_data = files.upload()

# Import required libraries
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.font_manager as fm
import arabic_reshaper
from bidi.algorithm import get_display
import io

# Load font
font_filename = list(uploaded_font.keys())[0]
font_prop = fm.FontProperties(fname=font_filename)

# Determine uploaded data file format and read it
data_filename = list(uploaded_data.keys())[0]
if data_filename.endswith('.xlsx'):
    df = pd.read_excel(io.BytesIO(uploaded_data[data_filename]))
elif data_filename.endswith('.csv'):
    df = pd.read_csv(io.BytesIO(uploaded_data[data_filename]))
else:
    raise ValueError("Unsupported file format. Please upload a .xlsx or .csv file.")

# Make sure column names are correct
df.columns = ["شهر", "فراوانی"]

# Reshape Persian text for correct display
df["شهر"] = df["شهر"].apply(lambda x: get_display(arabic_reshaper.reshape(str(x))))

# Plotting the bar chart
plt.figure(figsize=(10, 6))
bars = plt.bar(df["شهر"], df["فراوانی"], color='deeppink')  # Bright pink color

# Add labels and title
plt.xlabel(get_display(arabic_reshaper.reshape("شهر")), fontproperties=font_prop)
plt.ylabel(get_display(arabic_reshaper.reshape("فراوانی")), fontproperties=font_prop)
plt.title(get_display(arabic_reshaper.reshape("نمودار فراوانی بر اساس شهر")), fontproperties=font_prop)

# Set font for ticks
plt.xticks(rotation=45, ha='right', fontproperties=font_prop)
plt.yticks(fontproperties=font_prop)

# Annotate bars with values
for bar in bars:
    yval = bar.get_height()
    plt.text(bar.get_x() + bar.get_width()/2, yval + 50, f'{yval}',
             ha='center', va='bottom', fontproperties=font_prop, fontsize=10)

# Save the plot as PNG
plt.tight_layout()
plt.savefig("bar_chart.png", dpi=300)
plt.show()

# Auto-download the saved image
files.download("bar_chart.png")
