# Notebook 2: Consultar datos desde MySQL e cargalos en Cassandra.

En este cuaderno se consultarán los datos almacenados anteriormente en la base de datos de MySQL (mirar y ejecutar Notebook 1). Después de consultarlos se pasarán a un keyspace de Cassandra.

In [4]:
import pandas as pd
from sqlalchemy import create_engine, text

mysql_user = "user"
mysql_password = "password"
mysql_host = "127.0.0.1"
mysql_port = "3306"
mysql_db = "testdb"
table_name = "mi_tabla"

engine = create_engine(f"mysql+mysqlconnector://{mysql_user}:{mysql_password}@{mysql_host}:{mysql_port}/{mysql_db}")
df = pd.read_sql(f"SELECT * FROM {table_name}", engine)
df.head()

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
0,s1,Movie,Dick Johnson Is Dead,Kirsten Johnson,,United States,"September 25, 2021",2020,PG-13,90 min,Documentaries,"As her father nears the end of his life, filmm..."
1,s2,TV Show,Blood & Water,,"Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban...",South Africa,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, TV Dramas, TV Mysteries","After crossing paths at a party, a Cape Town t..."
2,s3,TV Show,Ganglands,Julien Leclercq,"Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi...",,"September 24, 2021",2021,TV-MA,1 Season,"Crime TV Shows, International TV Shows, TV Act...",To protect his family from a powerful drug lor...
3,s4,TV Show,Jailbirds New Orleans,,,,"September 24, 2021",2021,TV-MA,1 Season,"Docuseries, Reality TV","Feuds, flirtations and toilet talk go down amo..."
4,s5,TV Show,Kota Factory,,"Mayur More, Jitendra Kumar, Ranjan Raj, Alam K...",India,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, Romantic TV Shows, TV ...",In a city of coaching centers known to train I...


In [8]:
from cassandra.cluster import Cluster

cluster = Cluster(['localhost'], port=9042)
session = cluster.connect()

session.execute("""
CREATE KEYSPACE IF NOT EXISTS testkeyspace
WITH REPLICATION = { 'class': 'SimpleStrategy', 'replication_factor': 1 }
""")

session.set_keyspace('testkeyspace')

session.execute("""
CREATE TABLE IF NOT EXISTS mi_tabla_cassandra (
    id TEXT PRIMARY KEY,
    type TEXT,
    title TEXT,
    director TEXT,
    cast TEXT,
    country TEXT,
    date_added TEXT,
    release_year INT,
    rating TEXT,
    duration TEXT,
    listed_in TEXT,
    description TEXT
)
""")

from cassandra.query import PreparedStatement

insert_query = session.prepare("""
INSERT INTO mi_tabla_cassandra (
    id, type, title, director, cast, country,
    date_added, release_year, rating, duration,
    listed_in, description
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""")

for _, row in df.iterrows():
    session.execute(insert_query, (
        row['show_id'], row['type'], row['title'], row['director'], 
        row['cast'], row['country'], row['date_added'], row['release_year'], 
        row['rating'], row['duration'], row['listed_in'], row['description']
    ))

rows = session.execute("SELECT id, title, type FROM mi_tabla_cassandra LIMIT 5")
for r in rows:
    print(r)


# ¡¡¡¡¡IMPORTANTE CERRAR EL CLUSTER ANTES DE CONTINUAR!!!!!
cluster.shutdown()

Row(id='s419', title='Chhota Bheem Ka Roosi Romanch', type='Movie')
Row(id='s2571', title='Lady Driver', type='Movie')
Row(id='s8478', title='The Real Miyagi', type='Movie')
Row(id='s341', title='Inception', type='Movie')
Row(id='s3520', title='Kabaneri of the Iron Fortress: The Battle of Unato', type='TV Show')
