In [4]:
# pip install pandas ast numpy
# %pip install ace_tools
# %pip install pymongo


In [5]:
from pymongo import MongoClient, UpdateOne
import pandas as pd
import ast

In [None]:
# 1. Chargement des données
df_films = pd.read_csv("dataset.csv")
df_users = pd.read_csv("top_actors.csv")

df_films['Average_rating'] = pd.to_numeric(df_films['Average_rating'], errors='coerce')

def safe_literal_eval(val):
    if pd.isna(val):
        return []
    try:
        return ast.literal_eval(val)
    except Exception:
        return []

df_films['Cast'] = df_films['Cast'].apply(safe_literal_eval)
df_long = df_films.explode('Cast').rename(columns={'Cast': 'Actor'})
df_long = df_long.dropna(subset=['Actor'])

actor_stats = (
    df_long.groupby('Actor')
           .agg(films_played=('Film_title', 'count'))
           .reset_index()
)

known_names = set(df_users['Name'].dropna())
actor_stats['popularity'] = (actor_stats['films_played'] >= 14) | (actor_stats['Actor'].isin(known_names))



In [10]:
print(f"Le nombre de lignes dans le dataframe actor_stats est: {len(actor_stats)}")


Le nombre de lignes dans le dataframe actor_stats est: 167172


In [11]:
# On filtre les acteurs populaires (pour éviter de traiter trop de données)
actor_stats_popularity = actor_stats[actor_stats['popularity']]
actor_stats_popularity


Unnamed: 0,Actor,films_played,popularity
192,Aaron Eckhart,9,True
346,Aaron Taylor-Johnson,15,True
382,Aasif Mandvi,17,True
603,Abraham Benrubi,17,True
745,Adam Brody,16,True
...,...,...,...
166128,Zach Grenier,14,True
166502,Zeljko Ivanek,17,True
166758,Zoe Kazan,23,True
166783,Zoe Saldaña,29,True


In [12]:
# 2. Connexion MongoDB Atlas
# MONGO_URI = "mongodb+srv://<username>:<password>@<cluster>.mongodb.net/?retryWrites=true&w=majority"
MONGO_URI = "mongodb+srv://aavenia:azerty@cluster0.mbl7igg.mongodb.net/?retryWrites=true&w=majority&appName=Cluster0"
client = MongoClient(MONGO_URI)
db = client['cinema']
collection = db['actors']

In [13]:
# 3. Préparation des opérations de mise à jour/insertion
operations = []

for _, row in actor_stats_popularity.iterrows():
    operations.append(
        UpdateOne(
            {"name": row["Actor"]},  # filtre de recherche
            {"$set": {
                "films_played": int(row["films_played"]),
                "popularity": bool(row["popularity"])
            }},
            upsert=True  # insère s’il n’existe pas
        )
    )

# 4. Exécution du bulk write
if operations:
    result = collection.bulk_write(operations)
    print(f"{result.modified_count} documents mis à jour, {result.upserted_count} insérés.")

client.close()


0 documents mis à jour, 1795 insérés.
