# Semantic analysis of sexism in French Rap using word embeddings

Natural Language Processing for the Social Sciences

Matteo Larrode, MSc Social Data Science

## Setup

In [5]:
import json
import pandas as pd
import requests
from tqdm import tqdm

# Data is from 2024 ACL-SRW paper "A Computational Analysis and Exploration of Linguistic Borrowings in French Rap Lyrics" by Lucas Zurbuchen and Rob Voigt.
# Paper: https://aclanthology.org/2024.acl-srw.27.pdf
# Source: https://github.com/ljz112/CLResearch/tree/main/dataEntries

In [6]:
# URLs for the data
old_songs_url = "https://raw.githubusercontent.com/ljz112/CLResearch/refs/heads/main/dataEntries/frenchDataOldSongs.json"
new_songs_url = "https://raw.githubusercontent.com/ljz112/CLResearch/refs/heads/main/dataEntries/frenchDataNew.json"

# Download the data
print("Downloading old songs data...")
old_songs_response = requests.get(old_songs_url)
data_old = json.loads(old_songs_response.text)
print("Downloading new songs data...")
new_songs_response = requests.get(new_songs_url)
data_new = json.loads(new_songs_response.text)
print("Data downloaded successfully.")

Downloading old songs data...
Downloading new songs data...
Data downloaded successfully.


In [None]:
# Create DataFrames
old_songs_df = pd.DataFrame(data_old['allSongs'])
old_artists_df = pd.DataFrame(data_old['allArtists'])

new_songs_df = pd.DataFrame(data_new['allSongs'])
new_artists_df = pd.DataFrame(data_new['allArtists'])

# Combine the DataFrames
all_songs_df = pd.concat([old_songs_df, new_songs_df], ignore_index=True)
all_artists_df = pd.concat([old_artists_df, new_artists_df], ignore_index=True)

# Save the DataFrames to CSV files
all_songs_df.to_csv("data/french_rap_songs.csv", index=False)
all_artists_df.to_csv("data/french_rap_artists.csv", index=False)