## Performing a comparative gender analysis and generating word clouds from the lyrics of female and male rappers in Senegal.

### Step 1: Import required libraries

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from wordcloud import WordCloud
import requests
import io

### Step 2: Define a function to load CSV from GitHub

In [None]:
def load_lyrics_from_github(raw_url):
    response = requests.get(raw_url).content
    return pd.read_csv(io.StringIO(response.decode('utf-8')))

### Step 3: Load lyrics data for female and male rappers

In [None]:
# Insert the actual RAW CSV URLs

female_csv_url = "https://raw.githubusercontent.com/AWJP/AWJP-data/refs/heads/main/Social%20Issues%20%26%20Human%20Rights/Lyrics%20of%20popular%20Senegal%20female%20rap%20songs%202024.csv"
male_csv_url = "https://raw.githubusercontent.com/AWJP/AWJP-data/refs/heads/main/Social%20Issues%20%26%20Human%20Rights/Lyrics%20of%20popular%20Senegal%20male%20rap%20songs%202024.csv"

female_df = load_lyrics_from_github(female_csv_url)
male_df = load_lyrics_from_github(male_csv_url)

### Step 4: Preview the structure

In [None]:
female_df.head(), male_df.head()

### Step 5: Combine all lyrics into one string per group

In [None]:
female_text = female_df['text'].dropna().str.cat(sep=' ')
male_text = male_df['text'].dropna().str.cat(sep=' ')

### Step 6: Create a function to generate a word cloud

In [None]:
def generate_wordcloud(text, title):
    wordcloud = WordCloud(width=800, height=400, background_color='white').generate(text)
    plt.figure(figsize=(10, 5))
    plt.imshow(wordcloud, interpolation='bilinear')
    plt.axis('off')
    plt.title(title, fontsize=16)
    plt.show()

### Step 7: Generate word clouds for both groups

In [None]:
generate_wordcloud(female_text, "Female Rappers' Lyrics Word Cloud")
generate_wordcloud(male_text, "Male Rappers' Lyrics Word Cloud")

### Optional Step 8: Compare top 20 most common words (basic)

In [None]:
from collections import Counter
import re

def get_top_words(text, n=20):
    words = re.findall(r'\b\w+\b', text.lower())
    return Counter(words).most_common(n)

print("Top words in Female lyrics:")
print(get_top_words(female_text))

print("\nTop words in Male lyrics:")
print(get_top_words(male_text))