In [4]:
!pip install nest_asyncio



In [8]:
import mysql.connector
import pandas as pd
import asyncio
import ipywidgets as widgets
from IPython.display import display, clear_output
import nest_asyncio
from pyspark.sql import SparkSession
from pyspark.sql.functions import lag, col
from pyspark.sql.window import Window

# Apply nest_asyncio to avoid conflicts in Jupyter
nest_asyncio.apply()

# Define MySQL connection parameters
db_config = {
    "host": "localhost",
    "user": "timlinkous",
    "password": "zipcode1",
    "database": "starmeter"
}

# Spark session setup
spark = SparkSession.builder \
    .appName("MySQL-Spark-Connection") \
    .config("spark.jars.packages", "mysql:mysql-connector-java:8.0.29") \
    .getOrCreate()

# Function to query latest data from MySQL using Spark
def get_event_log_data():
    try:
        df = spark.read.format("jdbc") \
            .option("url", f"jdbc:mysql://{db_config['host']}/{db_config['database']}") \
            .option("driver", "com.mysql.cj.jdbc.Driver") \
            .option("dbtable", "(SELECT * FROM event_log ORDER BY event_id DESC LIMIT 10) as event_log") \
            .option("user", db_config['user']) \
            .option("password", db_config['password']) \
            .load()
        
        return df
    except Exception as e:
        print(f"Error fetching data: {e}")
        return spark.createDataFrame([], schema=["event_id", "event_date", "celebrity", "event_description", "current_fan_count"])

# Function to calculate gain/loss
def calculate_gain_loss(df):
    window = Window.partitionBy("celebrity").orderBy("event_id")
    df = df.withColumn("previous_fan_count", lag("current_fan_count").over(window))
    df = df.withColumn("gain_loss", col("current_fan_count") - col("previous_fan_count"))
    return df

# Global variables
ticker_running = False
output = widgets.Output()

# Create an async function to update the display in real-time
async def update_ticker():
    global ticker_running
    while ticker_running:
        try:
            event_log_spark = get_event_log_data()
            event_log_with_gain_loss = calculate_gain_loss(event_log_spark)
            
            # Convert to Pandas for display (consider using Spark's show() method for very large datasets)
            event_log_pandas = event_log_with_gain_loss.toPandas().head(10)
            
            with output:
                clear_output(wait=True)
                if not event_log_pandas.empty:
                    display(event_log_pandas)
                else:
                    print("No data fetched. Check database connection.")
        except Exception as e:
            with output:
                clear_output(wait=True)
                print(f"Error updating ticker: {e}")
        await asyncio.sleep(2)  # Update every 2 seconds

# Function to start the ticker
def start_ticker(button):
    global ticker_running
    if not ticker_running:
        ticker_running = True
        asyncio.create_task(update_ticker())

# Function to stop the ticker
def stop_ticker(button):
    global ticker_running
    ticker_running = False
    with output:
        clear_output(wait=True)
        print("Ticker stopped.")

# Create start and stop buttons
start_button = widgets.Button(description="Start Ticker", button_style='success')
stop_button = widgets.Button(description="Stop Ticker", button_style='danger')

# Attach event listeners to the buttons
start_button.on_click(start_ticker)
stop_button.on_click(stop_ticker)

# Display the buttons and output
button_box = widgets.HBox([start_button, stop_button])
display(widgets.VBox([button_box, output]))

# Initial data fetch to verify everything is working
with output:
    initial_data = get_event_log_data()
    if initial_data.count() > 0:
        display(initial_data.toPandas().head(10))
    else:
        print("No initial data fetched. Please check your database connection.")

ConnectionRefusedError: [Errno 61] Connection refused

In [None]:
mv /path/to/your/notebook.ipynb /path/to/target/directory/