In [4]:
import csv
import pandas as pd
import re

In [99]:
def cleaning_cells(file_csv):
    df = pd.read_csv(file_csv)
    # remove rows with empty 
    df = df.dropna() 
    # remove rows with 'outstanding' in cat
    df['category'] = df['category'].str.replace('outstanding', '', case=False)
    # remove ;
    df['staff'] = df['staff'].str.replace(';', '')
    df.to_csv(file_csv, index=False)
    
def print_uniques(df, column):
    uniques = df[column].value_counts().sort_index()
    print(f"unique '{column}' values:")
    for val in uniques.index:
        print(val)
        
def comma_remove(df):
    for column in df.columns:
        df[column] = df[column].apply(lambda x: x.split(',')[0] if isinstance(x, str) else x)
    return df

In [100]:
file_csv = '../dataset/the_emmy_awards.csv'
cleaning_cells(file_csv)
df = pd.read_csv(file_csv)
df = df.drop(columns=['id', 'company', 'producer'])

In [101]:
print_uniques(df, 'category')

unique 'category' values:
 ACHIEVEMENT IN MAIN TITLE THEME MUSIC
 ACHIEVEMENT IN MAKEUP
 ACHIEVEMENT IN MUSIC COMPOSITION for a limited series or a special (dramatic underscore)
 ACHIEVEMENT IN MUSIC DIRECTION
 ACHIEVEMENT IN NON-FICTION PROGRAMMING - PICTURE EDITING
 ACHIEVEMENT IN TECHNICAL DIRECTION AND electronic camerawork
 ANIMATED PROGRAM (FOR PROGRAMMING ONE HOUR OR LESS)
 ANIMATED PROGRAM (FOR PROGRAMMING one hour or less.)
 ART DIRECTION FOR A COMEDY-VARIETY or music series
 ART DIRECTION FOR A SERIES
 ART DIRECTION FOR A VARIETY OR MUSIC PROGRAM
 Actor In A Short Form Comedy Or Drama Series
 Actor in a Short Form Comedy or Drama Series
 Actress In A Short Form Comedy Or Drama Series
 Actress in a Short Form Comedy or Drama Series
 Animated Program
 Animated Program (for programming less than one hour)
 Animated Program (for programming one hour or more)
 Art Direction For A Contemporary Or Fantasy Series (Single-Camera)
 Art Direction For A Contemporary Program (Half-Hour Or

Removal of first useless lines

In [102]:
removes = [
    'costume', 'Costumes', 'creative', 'Hairstyling', 'Host', 'Individual', 'Innovation', 'Informational',
    'LIGHTING', 'MUSIC', 'CAMERA', 'Make up', 'Makeup', 'Make-up', 'Design', 'Editing', 'Reality', 'SOUND',
    'Short Form', 'Short-Format', 'Special Class', 'Stunt', 'Technical', 'animated', 'art direction', 'casting',
    'choreography', 'cinematography', 'commercial', 'commercials', 'composing', 'costume', 'costumes', 'Interactive',
    'Children\'s Program', 'Documentary', 'Competition', 'Visual Effects', 'Drama Series', 'Limited Series',
    'Television Movie', 'Variety Talk Series', 'Variety Sketch Series', 'Variety Special', 'Short Form Comedy or Drama Series'
]

categories_to_remove = ['Comedy Series', 'Drama Series',' Limited Series', ' Made For Television Movie', 'Miniseries', 'Miniseries or Movie', ' Nonfiction Program (Alternative)', ' Nonfiction Series', ' NON-FICTION SERIES - area award', ' Nonfiction Series (Traditional)', ' Nonfiction Series', ' Television Movie', ' Variety Series', ' Variety Sketch Series']
filtered_df = df[~df['category'].isin(categories_to_remove)]

emmy_prizes = df.copy()
for word in removes:
    emmy_prizes = emmy_prizes[~emmy_prizes['category'].str.contains(word, case=False)]

emmy_prizes.to_csv('../dataset/the_emmy_awards.csv', index=False)

In [103]:
print_uniques(df, 'category')

unique 'category' values:
 ACHIEVEMENT IN MAIN TITLE THEME MUSIC
 ACHIEVEMENT IN MAKEUP
 ACHIEVEMENT IN MUSIC COMPOSITION for a limited series or a special (dramatic underscore)
 ACHIEVEMENT IN MUSIC DIRECTION
 ACHIEVEMENT IN NON-FICTION PROGRAMMING - PICTURE EDITING
 ACHIEVEMENT IN TECHNICAL DIRECTION AND electronic camerawork
 ANIMATED PROGRAM (FOR PROGRAMMING ONE HOUR OR LESS)
 ANIMATED PROGRAM (FOR PROGRAMMING one hour or less.)
 ART DIRECTION FOR A COMEDY-VARIETY or music series
 ART DIRECTION FOR A SERIES
 ART DIRECTION FOR A VARIETY OR MUSIC PROGRAM
 Actor In A Short Form Comedy Or Drama Series
 Actor in a Short Form Comedy or Drama Series
 Actress In A Short Form Comedy Or Drama Series
 Actress in a Short Form Comedy or Drama Series
 Animated Program
 Animated Program (for programming less than one hour)
 Animated Program (for programming one hour or more)
 Art Direction For A Contemporary Or Fantasy Series (Single-Camera)
 Art Direction For A Contemporary Program (Half-Hour Or

In [104]:
colonne = ['staff']
emmy_prizes = comma_remove(emmy_prizes)
emmy_prizes.to_csv('../dataset/the_emmy_awards.csv', index=False)

In [118]:
import pandas as pd

# Carica il dataset dei film
df_movies = pd.read_csv('../dataset/COPIA_rounded_updated_films.csv')
df_movies['dir_emmy_nom'] = 0
df_movies['dir_emmy_won'] = 0
df_movies['writer_emmy_won'] = 0
df_movies['writer_emmy_nom'] = 0
df_movies['act_emmy_nom'] = 0
df_movies['act_emmy_won'] = 0

# Carica il dataset dei premi Emmy
df_emmies = pd.read_csv('../dataset/the_emmy_awards.csv')

# Funzione per normalizzare e dividere i nomi
def normalize_split_names(names):
    return [name.strip() for name in names.split(',')]

# Iterare su ogni riga del dataset degli Emmy
for index, row in df_emmies.iterrows():
    staff_members = normalize_split_names(row['staff'])
    emmy_year = row['year']
    is_winner = row['win']

    # Controllare corrispondenze nel dataset movies
    for staff in staff_members:
        for column in ['actors', 'writer', 'director']:
            matches = df_movies[column].apply(lambda x: staff in str(x).split(', '))
            valid_years = df_movies['year'] >= (emmy_year - 1)
            matches = matches & valid_years

            # Aggiorna i contatori per direttori, scrittori e attori
            if column == 'director':
                df_movies.loc[matches, 'dir_emmy_nom'] += 1
                if is_winner:
                    df_movies.loc[matches, 'dir_emmy_won'] += 1
            elif column == 'writer':
                df_movies.loc[matches, 'writer_emmy_nom'] += 1
                if is_winner:
                    df_movies.loc[matches, 'writer_emmy_won'] += 1
            elif column == 'actors':
                df_movies.loc[matches, 'act_emmy_nom'] += 1
                if is_winner:
                    df_movies.loc[matches, 'act_emmy_won'] += 1

# Salvare il dataset aggiornato
df_movies.to_csv('movies_with_emmies.csv', index=False)


# OSCAR

In [124]:
data = pd.read_csv('../dataset/the_oscar_award.csv')

# Extract unique categories
unique_categories = data['category'].unique()

# Save the unique categories into a new file
unique_categories_df = pd.DataFrame(unique_categories, columns=['category'])
# unique_categories_df.to_csv('/mnt/data/oscars.csv', index=False)

#unique_categories

categories_to_remove = [
    "UNIQUE AND ARTISTIC PICTURE", "SPECIAL AWARD", "OUTSTANDING PRODUCTION",
    "SOUND RECORDING", "SHORT SUBJECT (Cartoon)", "SHORT SUBJECT (Comedy)",
    "SHORT SUBJECT (Novelty)", "ASSISTANT DIRECTOR", "FILM EDITING", "MUSIC (Scoring)",
    "MUSIC (Song)", "DANCE DIRECTION", "SHORT SUBJECT (Color)", "SHORT SUBJECT (One-reel)",
    "SHORT SUBJECT (Two-reel)", "IRVING G. THALBERG MEMORIAL AWARD", "CINEMATOGRAPHY (Black-and-White)",
    "CINEMATOGRAPHY (Color)", "SPECIAL EFFECTS", "ART DIRECTION (Black-and-White)",
    "ART DIRECTION (Color)", "DOCUMENTARY (Short Subject)", "MUSIC (Music Score of a Dramatic Picture)",
    "MUSIC (Scoring of a Musical Picture)", "OUTSTANDING MOTION PICTURE", "DOCUMENTARY",
    "MUSIC (Music Score of a Dramatic or Comedy Picture)", "DOCUMENTARY (Feature)",
    "BEST MOTION PICTURE", "COSTUME DESIGN (Black-and-White)", "COSTUME DESIGN (Color)",
    "SPECIAL FOREIGN LANGUAGE FILM AWARD", "HONORARY FOREIGN LANGUAGE FILM AWARD",
    "HONORARY AWARD", "FOREIGN LANGUAGE FILM", "JEAN HERSHOLT HUMANITARIAN AWARD",
    "COSTUME DESIGN", "SHORT SUBJECT (Live Action)", "SOUND", "MUSIC (Music Score--substantially original)",
    "MUSIC (Scoring of Music--adaptation or treatment)", "BEST PICTURE", "SOUND EFFECTS",
    "SPECIAL VISUAL EFFECTS", "MUSIC (Original Music Score)", "MUSIC (Original Score--for a motion picture [not a musical])",
    "MUSIC (Score of a Musical Picture--original or adaptation)", "MUSIC (Song--Original for the Picture)",
    "MUSIC (Original Song Score)", "MUSIC (Original Dramatic Score)", "MUSIC (Scoring: Adaptation and Original Song Score)",
    "SHORT SUBJECT (Animated)", "SPECIAL ACHIEVEMENT AWARD (Visual Effects)",
    "MUSIC (Scoring: Original Song Score and Adaptation -or- Scoring: Adaptation)", "SHORT FILM (Animated)",
    "SHORT FILM (Live Action)", "SPECIAL ACHIEVEMENT AWARD (Sound Effects)", "MUSIC (Original Song Score and Its Adaptation or Adaptation Score)", "VISUAL EFFECTS", "SPECIAL ACHIEVEMENT AWARD", "SPECIAL ACHIEVEMENT AWARD (Sound Effects Editing)",
    "MUSIC (Adaptation Score)", "MUSIC (Original Song Score and Its Adaptation -or- Adaptation Score)",
    "SPECIAL ACHIEVEMENT AWARD (Sound Editing)", "SHORT FILM (Dramatic Live Action)", "MAKEUP",
    "SOUND EFFECTS EDITING", "MUSIC (Original Song Score or Adaptation Score)",
    "MUSIC (Original Musical or Comedy Score)", "SOUND EDITING", "ANIMATED FEATURE FILM",
    "SOUND MIXING", "MAKEUP AND HAIRSTYLING", "PRODUCTION DESIGN", "INTERNATIONAL FEATURE FILM",
    "DOCUMENTARY FEATURE FILM", "DOCUMENTARY SHORT FILM", "GORDON E. SAWYER AWARD",
    "AWARD OF COMMENDATION", "OUTSTANDING PICTURE", "MUSIC (Original Score)", "MUSIC (Original Song)", "CINEMATOGRAPHY", "ART DIRECTION", "ENGINEERING EFFECTS"
]

filtered_df = data[~data['category'].isin(categories_to_remove)]

# Salva il nuovo DataFrame in un file CSV
filtered_df.to_csv('../dataset/the_oscar_award.csv', index=False)


In [126]:
prefixes_to_remove = [
    'Screenplay by',
    'Story by',
    'Story and Screenplay by',
    'Written by',
    'Original story by'
]

# Compila la regex che corrisponde a qualsiasi dei prefissi definiti
# Aggiungi 'r' per denotare una raw string
regex_pattern = r'^(?:' + '|'.join(re.escape(prefix) for prefix in prefixes_to_remove) + r')\s'

filtered_df = filtered_df.dropna() 

# Rimuovi i prefissi dalla colonna 'name'
filtered_df['name'] = filtered_df['name'].replace(regex_pattern, '', regex=True)
# Salva il DataFrame risultante
filtered_df.to_csv('../dataset/the_oscar_award', index=False)

In [129]:
merged_imdb = pd.read_csv('../dataset/movies_with_emmies.csv')
oscars = pd.read_csv('../dataset/the_oscar_award.csv')

# Preparazione del DataFrame
merged_imdb['dir_oscar_nomination'] = 0
merged_imdb['dir_oscar_won'] = 0
merged_imdb['writer_oscar_nomination'] = 0
merged_imdb['writer_oscar_won'] = 0
merged_imdb['cast_oscar_nomination'] = 0
merged_imdb['cast_oscar_won'] = 0

# Assicurati che le colonne non abbiano valori NaN
merged_imdb['actors'] = merged_imdb['actors'].fillna('')
merged_imdb['director'] = merged_imdb['director'].fillna('')
merged_imdb['writer'] = merged_imdb['writer'].fillna('')

# Funzione per normalizzare i nomi
def normalize_name(name):
    name = name.replace('&', ',').replace(' Jr', ' Jr.').replace(' Senior', ' Senior').split(',')
    name = [n.strip() for n in name]
    return name

# Funzione per gestire la verifica di nomination e vittorie Oscar
def update_oscars(row):
    raw_names = normalize_name(row['name'])
    year_oscars = int(row['year_film'])
    is_winner = row['winner'] == True
    
    # Crea una lista di tutti i possibili nomi invertiti
    names = []
    for name in raw_names:
        parts = name.split()
        if len(parts) > 1:
            reversed_name = ' '.join(reversed(parts))
            names.append(name)
            names.append(reversed_name)
        else:
            names.append(name)
    
    for name in names:
        # Verifica per la categoria DIRECTOR
        if 'DIRECTING' in row['category']:
            directors = merged_imdb['director'].apply(lambda x: name in x)
            year_condition = merged_imdb['year'] >= year_oscars
            indices = merged_imdb[directors & year_condition].index
            merged_imdb.loc[indices, 'dir_oscar_nomination'] += 1
            if is_winner:
                merged_imdb.loc[indices, 'dir_oscar_won'] += 1

        # Verifica per la categoria WRITING
        elif 'WRITING' in row['category']:
            writers = merged_imdb['writer'].apply(lambda x: name in x)
            year_condition = merged_imdb['year'] >= year_oscars
            indices = merged_imdb[writers & year_condition].index
            merged_imdb.loc[indices, 'writer_oscar_nomination'] += 1
            if is_winner:
                merged_imdb.loc[indices, 'writer_oscar_won'] += 1

        # Verifica per la categoria ACTOR o ACTRESS
        elif 'ACTOR' in row['category'] or 'ACTRESS' in row['category']:
            actors_condition = merged_imdb['actors'].apply(lambda x: name in x)
            year_condition = merged_imdb['year'] >= year_oscars
            indices = merged_imdb[actors_condition & year_condition].index
            merged_imdb.loc[indices, 'cast_oscar_nomination'] += 1
            if is_winner:
                merged_imdb.loc[indices, 'cast_oscar_won'] += 1

# Applica la funzione a tutte le righe di oscars
oscars.apply(update_oscars, axis=1)

# Salva il nuovo DataFrame
merged_imdb.to_csv('../dataset/movies_with_oscar.csv', index=False)


# Golden Globe

In [130]:
categories_to_remove = [
"Picture",
"Promoting International Understanding",
"Special Achievement Award",
"Best Screenplay - Motion Picture",
"Best Original Score - Motion Picture",
"New Star Of The Year - Actress",
"New Star Of The Year - Actor",
"Juvenile Performance",
"Cinematography",
"Foreign Film - English Language",
"Best Motion Picture - Foreign Language",
"Outstanding Use Of Color",
"New Star Of The Year",
"Henrietta Award (World Film Favorites)",
"Cinematography - Color",
"Cinematography - Black And White",
"Best Motion Picture - Drama",
"Best Motion Picture - Musical or Comedy",
"Henrietta Award (World Film Favorite)",
"Cecil B. deMille Award",
"Documentary",
"Television Achievement",
"Hollywood Citizenship Award",
"Foreign Film - Foreign Language",
"Picture - Musical",
"Picture - Comedy",
"Samuel Goldwyn International Award",
"Famous Silent Filmstars",
"Best Original Song - Motion Picture",
"Television Series",
"Best Television Series - Drama",
"Television Program",
"Television Producer/Director",
"Television Series - Comedy",
"International News Coverage",
"Television Series - Variety",
"Best Television Series - Musical or Comedy",
"Television Movie",
"Television Special - Variety Or Musical",
"Best Television Limited Series or Motion Picture Made for Television",
"Best Motion Picture - Animated",
"Carol Burnett Award"
]
gg_df = pd.read_csv('../dataset/golden_globe_awards.csv')
filtered_df = gg_df[~gg_df['category'].isin(categories_to_remove)]

filtered_df.to_csv('../dataset/golden_globe_awards.csv', index=False)

In [135]:
merged_imdb = pd.read_csv('../dataset/movies_with_oscar.csv')
oscars = pd.read_csv('../dataset/golden_globe_awards.csv')

# Preparazione del DataFrame
merged_imdb['dir_globe_nomination'] = 0
merged_imdb['dir_globe_won'] = 0
merged_imdb['cast_globe_nomination'] = 0
merged_imdb['cast_globe_won'] = 0

# Assicurati che le colonne non abbiano valori NaN
merged_imdb['actors'] = merged_imdb['actors'].fillna('')
merged_imdb['director'] = merged_imdb['director'].fillna('')

# Funzione per normalizzare i nomi
def normalize_name(name):
    name = name.replace('&', ',').split(',')
    name = [n.strip() for n in name]
    return name

# Funzione per gestire la verifica di nomination e vittorie Oscar
def update_globe(row):
    raw_names = normalize_name(row['nominee'])
    year_oscars = int(row['year_award'])
    is_winner = row['win'] == True
    
    # Crea una lista di tutti i possibili nomi invertiti
    names = []
    for name in raw_names:
        parts = name.split()
        if len(parts) > 1:
            reversed_name = ' '.join(reversed(parts))
            names.append(name)
            names.append(reversed_name)
        else:
            names.append(name)
    
    for name in names:
        # Verifica per la categoria DIRECTOR
        if 'Director' in row['category']:
            directors = merged_imdb['director'].apply(lambda x: name in x)
            year_condition = merged_imdb['year'] >= year_oscars
            indices = merged_imdb[directors & year_condition].index
            merged_imdb.loc[indices, 'dir_globe_nomination'] += 1
            if is_winner:
                merged_imdb.loc[indices, 'dir_globe_won'] += 1
        # Verifica per la categoria ACTOR o ACTRESS
        elif 'actor' in row['category'] or 'Actor' in row['category'] or 'actress' in row['category'] or 'Actress' in row['category']:
            actors_condition = merged_imdb['actors'].apply(lambda x: name in x)
            year_condition = merged_imdb['year'] >= year_oscars
            indices = merged_imdb[actors_condition & year_condition].index
            merged_imdb.loc[indices, 'cast_globe_nomination'] += 1
            if is_winner:
                merged_imdb.loc[indices, 'cast_globe_won'] += 1

# Applica la funzione a tutte le righe di oscars
oscars.apply(update_globe, axis=1)

# Salva il nuovo DataFrame
merged_imdb.to_csv('../dataset/movies_with_gg.csv', index=False)

# BAFTA

In [5]:
allowed_categories = [
    "Film | Achievement in Direction in 1987",
    "Film | Achievement in Direction in 1988",
    "Film | Achievement in Direction in 1989",
    "Film | Achievement in Direction in 1990",
    "Film | Achievement in Direction in 1991",
    "Film | Actor in 1969",
    "Film | Actor in 1970",
    "Film | Actor in 1971",
    "Film | Actor in 1972",
    "Film | Actor in 1973",
    "Film | Actor in 1974",
    "Film | Actor in 1975",
    "Film | Actor in 1976",
    "Film | Actor in 1977",
    "Film | Actor in 1978",
    "Film | Actor in 1979",
    "Film | Actor in 1980",
    "Film | Actor in 1981",
    "Film | Actor in 1982",
    "Film | Actor in 1983",
    "Film | Actor in 1984",
    "Film | Actor in 1985",
    "Film | Actor in a Leading Role in 1986",
    "Film | Actor in a Leading Role in 1987",
    "Film | Actor in a Leading Role in 1988",
    "Film | Actor in a Leading Role in 1989",
    "Film | Actor in a Leading Role in 1990",
    "Film | Actor in a Leading Role in 1991",
    "Film | Actor in a Leading Role in 1992",
    "Film | Actor in a Leading Role in 1993",
    "Film | Actor in a Leading Role in 1994",
    "Film | Actor in a Leading Role in 1995",
    "Film | Actor in a Leading Role in 1996",
    "Film | Actor in a Leading Role in 2001",
    "Film | Actor in a Supporting Role in 1986",
    "Film | Actor in a Supporting Role in 1987",
    "Film | Actor in a Supporting Role in 1988",
    "Film | Actor in a Supporting Role in 1989",
    "Film | Actor in a Supporting Role in 1990",
    "Film | Actor in a Supporting Role in 1991",
    "Film | Actor in a Supporting Role in 1992",
    "Film | Actor in a Supporting Role in 1993",
    "Film | Actor in a Supporting Role in 1994",
    "Film | Actor in a Supporting Role in 1995",
    "Film | Actor in a Supporting Role in 1996",
    "Film | Actor in a Supporting Role in 2001",
    "Film | Actress in 1969",
    "Film | Actress in 1970",
    "Film | Actress in 1971",
    "Film | Actress in 1972",
    "Film | Actress in 1973",
    "Film | Actress in 1974",
    "Film | Actress in 1975",
    "Film | Actress in 1976",
    "Film | Actress in 1977",
    "Film | Actress in 1978",
    "Film | Actress in 1979",
    "Film | Actress in 1980",
    "Film | Actress in 1981",
    "Film | Actress in 1982",
    "Film | Actress in 1983",
    "Film | Actress in 1984",
    "Film | Actress in 1985",
    "Film | Actress in a Leading Role in 1986",
    "Film | Actress in a Leading Role in 1987",
    "Film | Actress in a Leading Role in 1988",
    "Film | Actress in a Leading Role in 1989",
    "Film | Actress in a Leading Role in 1990",
    "Film | Actress in a Leading Role in 1991",
    "Film | Actress in a Leading Role in 1992",
    "Film | Actress in a Leading Role in 1993",
    "Film | Actress in a Leading Role in 1994",
    "Film | Actress in a Leading Role in 1995",
    "Film | Actress in a Leading Role in 1996",
    "Film | Actress in a Leading Role in 2001",
    "Film | Actress in a Supporting Role in 1986",
    "Film | Actress in a Supporting Role in 1987",
    "Film | Actress in a Supporting Role in 1988",
    "Film | Actress in a Supporting Role in 1989",
    "Film | Actress in a Supporting Role in 1990",
    "Film | Actress in a Supporting Role in 1991",
    "Film | Actress in a Supporting Role in 1992",
    "Film | Actress in a Supporting Role in 1993",
    "Film | Actress in a Supporting Role in 1994",
    "Film | Actress in a Supporting Role in 1995",
    "Film | Actress in a Supporting Role in 1996",
    "Film | Actress in a Supporting Role in 2001",
    "Film | Adapted Screenplay in 1984",
    "Film | Adapted Screenplay in 1985",
    "Film | Adapted Screenplay in 1986",
    "Film | Adapted Screenplay in 1987",
    "Film | Adapted Screenplay in 1988",
    "Film | Adapted Screenplay in 1989",
    "Film | Adapted Screenplay in 1990",
    "Film | Adapted Screenplay in 1991",
    "Film | Adapted Screenplay in 1992",
    "Film | Adapted Screenplay in 1993",
    "Film | Adapted Screenplay in 1994",
    "Film | Adapted Screenplay in 1995",
    "Film | Adapted Screenplay in 1996",
    "Film | Adapted Screenplay in 2001",
    "Film | Adapted Screenplay in 2016",
    "Film | Adapted Screenplay in 2017",
    "Film | Adapted Screenplay in 2018",
    "Film | Adapted Screenplay in 2019",
    "Film | Adapted Screenplay in 2020",
    "Film | Best Film in 2018",
    "Film | Best Film in 2019",
    "Film | Best Film in 2020",
    "Film | British Actor in 1953",
    "Film | British Actor in 1954",
    "Film | British Actor in 1955",
    "Film | British Actor in 1956",
    "Film | British Actor in 1957",
    "Film | British Actor in 1958",
    "Film | British Actor in 1959",
    "Film | British Actor in 1960",
    "Film | British Actor in 1961",
    "Film | British Actor in 1962",
    "Film | British Actor in 1963",
    "Film | British Actor in 1964",
    "Film | British Actor in 1965",
    "Film | British Actor in 1966",
    "Film | British Actor in 1967",
    "Film | British Actor in 1968",
    "Film | British Actress in 1953",
    "Film | British Actress in 1954",
    "Film | British Actress in 1955",
    "Film | British Actress in 1956",
    "Film | British Actress in 1957",
    "Film | British Actress in 1958",
    "Film | British Actress in 1959",
    "Film | British Actress in 1960",
    "Film | British Actress in 1961",
    "Film | British Actress in 1962",
    "Film | British Actress in 1963",
    "Film | British Actress in 1964",
    "Film | British Actress in 1965",
    "Film | British Actress in 1966",
    "Film | British Actress in 1967",
    "Film | British Actress in 1968",
    "Film | British Film in 1960",
    "Film | British Film in 1961",
    "Film | British Film in 1966",
    "Film | British Film in 1967",
    "Film | British Film in 1968",
    "Film | British Screenplay in 1955",
    "Film | British Screenplay in 1956",
    "Film | British Screenplay in 1957",
    "Film | British Screenplay in 1958",
    "Film | British Screenplay in 1959",
    "Film | British Screenplay in 1960",
    "Film | British Screenplay in 1961",
    "Film | British Screenplay in 1962",
    "Film | British Screenplay in 1963",
    "Film | British Screenplay in 1964",
    "Film | British Screenplay in 1965",
    "Film | British Screenplay in 1966",
    "Film | British Screenplay in 1967",
    "Film | British Screenplay in 1968",
    "Film | British Short Animation in 2016",
    "Film | British Short Animation in 2017",
    "Film | British Short Animation in 2018",
    "Film | British Short Animation in 2019",
    "Film | British Short Animation in 2020",
    "Film | British Short Film in 2016",
    "Film | British Short Film in 2017",
    "Film | British Short Film in 2018",
    "Film | British Short Film in 2019",
    "Film | British Short Film in 2020",
    "Film | Direction in 1969",
    "Film | Direction in 1970",
    "Film | Direction in 1971",
    "Film | Direction in 1972",
    "Film | Direction in 1973",
    "Film | Direction in 1974",
    "Film | Direction in 1975",
    "Film | Direction in 1976",
    "Film | Direction in 1977",
    "Film | Direction in 1978",
    "Film | Direction in 1979",
    "Film | Direction in 1980",
    "Film | Direction in 1981",
    "Film | Direction in 1982",
    "Film | Direction in 1983",
    "Film | Direction in 1984",
    "Film | Direction in 1985",
    "Film | Director in 2016",
    "Film | Director in 2017",
    "Film | Director in 2018",
    "Film | Director in 2019",
    "Film | Director in 2020",
    "Film | Documentary in 2016",
    "Film | Documentary in 2017",
    "Film | Documentary in 2018",
    "Film | Documentary in 2019",
    "Film | Documentary in 2020",
    "Film | Editing - for best film editing of a British Film in 1967",
    "Film | Editing in 1969",
    "Film | Editing in 1970",
    "Film | Editing in 1981",
    "Film | Editing in 1982",
    "Film | Editing in 1984",
    "Film | Editing in 1985",
    "Film | Editing in 1986",
    "Film | Editing in 1987",
    "Film | Editing in 1988",
    "Film | Editing in 1989",
    "Film | Editing in 1990",
    "Film | Editing in 1991",
    "Film | Editing in 1992",
    "Film | Editing in 1993",
    "Film | Editing in 1994",
    "Film | Editing in 1995",
    "Film | Editing in 1996",
    "Film | Editing in 2001",
    "Film | Editing in 2016",
    "Film | Editing in 2017",
    "Film | Editing in 2018",
    "Film | Editing in 2019",
    "Film | Editing in 2020",
    "Film | Fictional Film in 1978",
    "Film | Film From Any Source in 1960",
    "Film | Film From Any Source in 1961",
    "Film | Film Not In The English Language in 2020",
    "Film | Film Not in the English Language in 1989",
    "Film | Film Not in the English Language in 1990",
    "Film | Film Not in the English Language in 1991",
    "Film | Film Not in the English Language in 1992",
    "Film | Film Not in the English Language in 1993",
    "Film | Film Not in the English Language in 1994",
    "Film | Film Not in the English Language in 1995",
    "Film | Film Not in the English Language in 1996",
    "Film | Film Not in the English Language in 2001",
    "Film | Film Not in the English Language in 2016",
    "Film | Film Not in the English Language in 2017",
    "Film | Film Not in the English Language in 2018",
    "Film | Film Not in the English Language in 2019",
    "Film | Foreign Actor in 1953",
    "Film | Foreign Actor in 1954",
    "Film | Foreign Actor in 1955",
    "Film | Foreign Actor in 1956",
    "Film | Foreign Actor in 1957",
    "Film | Foreign Actor in 1958",
    "Film | Foreign Actor in 1959",
    "Film | Foreign Actor in 1960",
    "Film | Foreign Actor in 1961",
    "Film | Foreign Actor in 1962",
    "Film | Foreign Actor in 1963",
    "Film | Foreign Actor in 1964",
    "Film | Foreign Actor in 1965",
    "Film | Foreign Actor in 1966",
    "Film | Foreign Actor in 1967",
    "Film | Foreign Actor in 1968",
    "Film | Foreign Actress in 1953",
    "Film | Foreign Actress in 1954",
    "Film | Foreign Actress in 1955",
    "Film | Foreign Actress in 1956",
    "Film | Foreign Actress in 1957",
    "Film | Foreign Actress in 1958",
    "Film | Foreign Actress in 1959",
    "Film | Foreign Actress in 1960",
    "Film | Foreign Actress in 1961",
    "Film | Foreign Actress in 1962",
    "Film | Foreign Actress in 1963",
    "Film | Foreign Actress in 1964",
    "Film | Foreign Actress in 1965",
    "Film | Foreign Actress in 1966",
    "Film | Foreign Actress in 1967",
    "Film | Foreign Actress in 1968",
    "Film | Foreign Language Film in 1983",
    "Film | Foreign Language Film in 1984",
    "Film | Foreign Language Film in 1985",
    "Film | Foreign Language Film in 1986",
    "Film | Foreign Language Film in 1987",
    "Film | Foreign Language Film in 1988",
    "Film | John Grierson Award (Short Film) in 1975",
    "Film | John Grierson Award (Short Film) in 1976",
    "Film | Leading Actor in 2016",
    "Film | Leading Actor in 2017",
    "Film | Leading Actor in 2018",
    "Film | Leading Actor in 2019",
    "Film | Leading Actor in 2020",
    "Film | Leading Actress in 2016",
    "Film | Leading Actress in 2017",
    "Film | Leading Actress in 2018",
    "Film | Leading Actress in 2019",
    "Film | Leading Actress in 2020",
    "Film | Most Promising Newcomer To Film in 1953",
    "Film | Most Promising Newcomer To Film in 1954",
    "Film | Most Promising Newcomer To Film in 1955",
    "Film | Most Promising Newcomer To Film in 1956",
    "Film | Most Promising Newcomer To Film in 1957",
    "Film | Most Promising Newcomer To Film in 1958",
    "Film | Most Promising Newcomer To Film in 1959",
    "Film | Most Promising Newcomer To Film in 1960",
    "Film | Most Promising Newcomer To Film in 1984",
    "Film | Most Promising Newcomer To Film in 1985",
    "Film | Original Screenplay in 1984",
    "Film | Original Screenplay in 1985",
    "Film | Original Screenplay in 1986",
    "Film | Original Screenplay in 1987",
    "Film | Original Screenplay in 1988",
    "Film | Original Screenplay in 1989",
    "Film | Original Screenplay in 1990",
    "Film | Original Screenplay in 1991",
    "Film | Original Screenplay in 1992",
    "Film | Original Screenplay in 1993",
    "Film | Original Screenplay in 1994",
    "Film | Original Screenplay in 1995",
    "Film | Original Screenplay in 1996",
    "Film | Original Screenplay in 2001",
    "Film | Original Screenplay in 2016",
    "Film | Original Screenplay in 2017",
    "Film | Original Screenplay in 2018",
    "Film | Original Screenplay in 2019",
    "Film | Original Screenplay in 2020",
    "Film | Outstanding Debut By A British Writer, Director Or Producer in 2020",
    "Film | Outstanding Debut By A British Writer, Director or Producer in 2016",
    "Film | Outstanding Debut By A British Writer, Director or Producer in 2017",
    "Film | Outstanding Debut by a British Writer, Director or Producer in 2018",
    "Film | Outstanding Debut by a British Writer, Director or Producer in 2019",
    "Film | Production Design in 1977",
    "Film | Production Design in 1978",
    "Film | Production Design in 1979",
    "Film | Production Design in 1980",
    "Film | Production Design in 1981",
    "Film | Production Design in 1982",
    "Film | Production Design in 1983",
    "Film | Production Design in 1984",
    "Film | Production Design in 1985",
    "Film | Production Design in 1986",
    "Film | Production Design in 1987",
    "Film | Production Design in 1988",
    "Film | Production Design in 1989",
    "Film | Production Design in 1990",
    "Film | Production Design in 1991",
    "Film | Production Design in 1992",
    "Film | Production Design in 1993",
    "Film | Production Design in 1994",
    "Film | Production Design in 1995",
    "Film | Production Design in 1996",
    "Film | Production Design in 2001",
    "Film | Production Design in 2016",
    "Film | Production Design in 2017",
    "Film | Production Design in 2018",
    "Film | Production Design in 2019",
    "Film | Production Design in 2020",
    "Film | Robert Flaherty Award (Feature Length Film, Documentary In Content) in 1960",
    "Film | Robert Flaherty Award (Feature Length Film, Documentary In Content) in 1966",
    "Film | Robert Flaherty Award (Feature Length Film, Documentary In Content) in 1967",
    "Film | Robert Flaherty Award (Feature Length Film, Documentary In Content) in 1968",
    "Film | Robert Flaherty Award (Feature Length Film, Documentary In Content) in 1969",
    "Film | Robert Flaherty Award (Feature Length Film, Documentary In Content) in 1970",
    "Film | Robert Flaherty Award (Feature Length Film, Documentary In Content) in 1971",
    "Film | Robert Flaherty Award (Feature Length Film, Documentary In Content) in 1972",
    "Film | Robert Flaherty Award (Feature Length Film, Documentary In Content) in 1974",
    "Film | Robert Flaherty Award (Feature Length Film, Documentary In Content) in 1975",
    "Film | Robert Flaherty Award (Feature Length Film, Documentary In Content) in 1976",
    "Film | Robert Flaherty Award (Feature Length Film, Documentary In Content) in 1977",
    "Film | Robert Flaherty Award (Feature Length Film, Documentary In Content) in 1979",
    "Film | Robert Flaherty Award (Feature Length Film, Documentary In Content) in 1980",
    "Film | Robert Flaherty Award (Feature Length Film, Documentary In Content) in 1982",
    "Film | Robert Flaherty Award (Feature Length Film, Documentary In Content) in 1983",
    "Film | Screenplay in 1969",
    "Film | Screenplay in 1970",
    "Film | Screenplay in 1971",
    "Film | Screenplay in 1972",
    "Film | Screenplay in 1973",
    "Film | Screenplay in 1974",
    "Film | Screenplay in 1975",
    "Film | Screenplay in 1976",
    "Film | Screenplay in 1977",
    "Film | Screenplay in 1978",
    "Film | Screenplay in 1979",
    "Film | Screenplay in 1980",
    "Film | Screenplay in 1981",
    "Film | Screenplay in 1982",
    "Film | Screenplay in 1983",
    "Film | Short Factual Film in 1977",
    "Film | Short Factual Film in 1979",
    "Film | Supporting Actor in 1969",
    "Film | Supporting Actor in 1970",
    "Film | Supporting Actor in 1971",
    "Film | Supporting Actor in 1972",
    "Film | Supporting Actor in 1973",
    "Film | Supporting Actor in 1974",
    "Film | Supporting Actor in 1975",
    "Film | Supporting Actor in 1976",
    "Film | Supporting Actor in 1977",
    "Film | Supporting Actor in 1978",
    "Film | Supporting Actor in 1979",
    "Film | Supporting Actor in 1980",
    "Film | Supporting Actor in 1983",
    "Film | Supporting Actor in 1984",
    "Film | Supporting Actor in 1985",
    "Film | Supporting Actor in 2016",
    "Film | Supporting Actor in 2017",
    "Film | Supporting Actor in 2018",
    "Film | Supporting Actor in 2019",
    "Film | Supporting Actor in 2020",
    "Film | Supporting Actress in 1969",
    "Film | Supporting Actress in 1970",
    "Film | Supporting Actress in 1971",
    "Film | Supporting Actress in 1972",
    "Film | Supporting Actress in 1973",
    "Film | Supporting Actress in 1974",
    "Film | Supporting Actress in 1975",
    "Film | Supporting Actress in 1976",
    "Film | Supporting Actress in 1977",
    "Film | Supporting Actress in 1978",
    "Film | Supporting Actress in 1979",
    "Film | Supporting Actress in 1980",
    "Film | Supporting Actress in 1983",
    "Film | Supporting Actress in 1984",
    "Film | Supporting Actress in 1985",
    "Film | Supporting Actress in 2016",
    "Film | Supporting Actress in 2017",
    "Film | Supporting Actress in 2018",
    "Film | Supporting Actress in 2019",
    "Film | Supporting Actress in 2020",
    "Film | Supporting Artist in 1982"
]

# Carica il dataframe
df = pd.read_csv('../dataset/bafta_films.csv')  # Modifica con il percorso corretto del tuo file

# Filtra il dataframe per mantenere solo le righe con i valori nella lista 'categories_to_keep'
df_filtered = df[df['category'].isin(allowed_categories)]

# Salva il dataframe filtrato, se necessario
df_filtered.to_csv('../dataset/NEW_bafta_films_cleaned.csv', index=False)




In [7]:
import csv

def conta_righe_e_scambia_valori(file_csv, valori_categoria):
    contatore = 0
    
    with open(file_csv, 'r', newline='', encoding='utf-8') as csvfile:
        reader = csv.DictReader(csvfile)
        righe = list(reader)  # Memorizza tutte le righe in una lista per consentire una scansione successiva
        
        for riga in righe:
            if riga['category'] in valori_categoria:
                contatore += 1
                # Scambia i valori delle colonne 'nominee' e 'workers'
                riga['nominee'], riga['workers'] = riga['workers'], riga['nominee']
    
    # Stampa il numero di righe trovate
    print("Numero di righe con categorie corrispondenti:", contatore)
    
    # Riscrive il file CSV con le modifiche apportate
    with open(file_csv, 'w', newline='', encoding='utf-8') as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=reader.fieldnames)
        writer.writeheader()
        writer.writerows(righe)

# Esempio di utilizzo
file_csv = '../dataset/NEW_bafta_films_cleaned.csv'
valori_categoria = [
    "Film | Most Promising Newcomer To Film in 1953",
    "Film | Most Promising Newcomer To Film in 1954",
    "Film | Most Promising Newcomer To Film in 1955",
    "Film | Most Promising Newcomer To Film in 1956",
    "Film | Most Promising Newcomer To Film in 1957",
    "Film | Most Promising Newcomer To Film in 1958",
    "Film | Most Promising Newcomer To Film in 1959",
    "Film | Most Promising Newcomer To Film in 1960",
    "Film | Most Promising Newcomer To Film in 1984",
    "Film | Most Promising Newcomer To Film in 1985",
    "Film | Screenplay in 1969",
    "Film | Screenplay in 1970",
    "Film | Screenplay in 1971",
    "Film | Screenplay in 1972",
    "Film | Screenplay in 1973",
    "Film | Screenplay in 1974",
    "Film | Screenplay in 1975",
    "Film | Screenplay in 1976",
    "Film | Screenplay in 1977",
    "Film | Screenplay in 1978",
    "Film | Screenplay in 1979",
    "Film | Screenplay in 1980",
    "Film | Screenplay in 1981",
    "Film | Screenplay in 1982",
    "Film | Screenplay in 1983",
    "Film | Short Factual Film in 1977",
    "Film | Short Factual Film in 1979",
    "Film | Supporting Actor in 1969",
    "Film | Supporting Actor in 1970",
    "Film | Supporting Actor in 1971",
    "Film | Supporting Actor in 1972",
    "Film | Supporting Actor in 1973",
    "Film | Supporting Actor in 1974",
    "Film | Supporting Actor in 1975",
    "Film | Supporting Actor in 1976",
    "Film | Supporting Actor in 1977",
    "Film | Supporting Actor in 1978",
    "Film | Supporting Actor in 1979",
    "Film | Supporting Actor in 1980",
    "Film | Supporting Actor in 1983",
    "Film | Supporting Actor in 1984",
    "Film | Supporting Actor in 1985",
    "Film | Supporting Actor in 2016",
    "Film | Supporting Actor in 2017",
    "Film | Supporting Actor in 2018",
    "Film | Supporting Actor in 2019",
    "Film | Supporting Actor in 2020",
    "Film | Supporting Actress in 1969",
    "Film | Supporting Actress in 1970",
    "Film | Supporting Actress in 1971",
    "Film | Supporting Actress in 1972",
    "Film | Supporting Actress in 1973",
    "Film | Supporting Actress in 1974",
    "Film | Supporting Actress in 1975",
    "Film | Supporting Actress in 1976",
    "Film | Supporting Actress in 1977",
    "Film | Supporting Actress in 1978",
    "Film | Supporting Actress in 1979",
    "Film | Supporting Actress in 1980",
    "Film | Supporting Actress in 1983",
    "Film | Supporting Actress in 1984",
    "Film | Supporting Actress in 1985",
    "Film | Supporting Actress in 2016",
    "Film | Supporting Actress in 2017",
    "Film | Supporting Actress in 2018",
    "Film | Supporting Actress in 2019",
    "Film | Supporting Actress in 2020",
    "Film | Supporting Artist in 1982"
]

conta_righe_e_scambia_valori(file_csv, valori_categoria)


Numero di righe con categorie corrispondenti: 281


# QUESTO FUNZIONAAAA    

In [14]:
import pandas as pd

# Caricare il dataset movies e aggiungere le colonne per BAFTA
df_movies = pd.read_csv('../dataset/movies_with_gg.csv')
df_movies['BAFTA_act_nom'] = 0
df_movies['BAFTA_act_won'] = 0
df_movies['BAFTA_dir_nom'] = 0
df_movies['BAFTA_dir_won'] = 0
df_movies['BAFTA_writer_nom'] = 0
df_movies['BAFTA_writer_won'] = 0


# Caricare il dataset BAFTA
df_bafta = pd.read_csv('../dataset/NEW_bafta_films_cleaned.csv')

# Funzione per normalizzare e dividere i nomi
def normalize_split_names(names):
    if isinstance(names, str):
        return [name.strip() for name in names.split(',')]
    else:
        return []

# Iterare su ogni riga del dataset BAFTA
for index, row in df_bafta.iterrows():
    workers = normalize_split_names(row['workers'])
    bafta_year = row['year']
    is_winner = row['winner']

    # Controllare corrispondenze nel dataset movies
    for worker in workers:
        # Creare una maschera per gli attori e l'anno
        mask = (df_movies['actors'].apply(lambda x: worker in x if isinstance(x, str) else False)) & (
            (df_movies['year'] >= bafta_year))
        # Aggiornare i conteggi di nomine e premi
        df_movies.loc[mask, 'BAFTA_nominees'] += 1
        if is_winner:
            df_movies.loc[mask, 'BAFTA_awards'] += 1

# Salvare il dataset aggiornato
df_movies.to_csv('movies_with_bafta.csv', index=False)


In [24]:
import pandas as pd

# Caricare il dataset movies e aggiungere le colonne per BAFTA
df_movies = pd.read_csv('../dataset/movies_with_gg.csv')
df_movies['BAFTA_act_nom'] = 0
df_movies['BAFTA_act_won'] = 0
df_movies['BAFTA_dir_nom'] = 0
df_movies['BAFTA_dir_won'] = 0
df_movies['BAFTA_writer_nom'] = 0
df_movies['BAFTA_writer_won'] = 0


# Caricare il dataset BAFTA
df_bafta = pd.read_csv('../dataset/NEW_bafta_films_cleaned.csv')

# Funzione per normalizzare e dividere i nomi
def normalize_split_names(names):
    if isinstance(names, str):
        return [name.strip() for name in names.split(',')]
    else:
        return []

# Termini relativi alla regia
director_terms = ['director', 'directing', 'direction', 'film', 'Robert', 'Production Design','Editing', 'Documentary']
# Termini relativi alla sceneggiatura
writer_terms = ['screenplay', 'writing', 'writer', 'Grierson']
actors_terms = ['actor','actress']

# Iterare su ogni riga del dataset BAFTA
for index, row in df_bafta.iterrows():
    workers = normalize_split_names(row['workers'])
    bafta_year = row['year']
    is_winner = row['winner']
    category = row['category']

    # Controllare corrispondenze nel dataset movies
    for worker in workers:
        # Se la categoria premiata è relativa alla regia
        if any(term in category.lower() for term in director_terms):
            mask = (df_movies['director'].apply(lambda x: worker in x if isinstance(x, str) else False)) & (
                (df_movies['year'] >= bafta_year-1))
            df_movies.loc[mask, 'BAFTA_dir_nom'] += 1
            if is_winner:
                df_movies.loc[mask, 'BAFTA_dir_won'] += 1
        # Se la categoria premiata è relativa alla sceneggiatura
        if any(term in category.lower() for term in writer_terms):
            mask = (df_movies['writer'].apply(lambda x: worker in x if isinstance(x, str) else False)) & (
                (df_movies['year'] >= bafta_year-1))
            df_movies.loc[mask, 'BAFTA_writer_nom'] += 1
            if is_winner:
                df_movies.loc[mask, 'BAFTA_writer_won'] += 1
        # Altrimenti, la categoria è relativa agli attori
        if any(term in category.lower() for term in actors_terms):
            mask = (df_movies['actors'].apply(lambda x: worker in x if isinstance(x, str) else False)) & (
                (df_movies['year'] >= bafta_year-1))
            df_movies.loc[mask, 'BAFTA_act_nom'] += 1
            if is_winner:
                df_movies.loc[mask, 'BAFTA_act_won'] += 1

# Salvare il dataset aggiornato
df_movies.to_csv('../dataset/movies_with_bafta.csv', index=False)
