In [58]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
import plotly.graph_objects as go
from datetime import timedelta

In [59]:
def clean_data(csv_file):
    df = pd.read_csv(csv_file)

    # Convert DateTime to datetime and set as index
    df['DateTime'] = pd.to_datetime(df['DateTime'])
    df.set_index('DateTime', inplace=True)

    # Ensure columns are numeric
    for col in ['Users', 'Average Users', 'In-Game']:
        df[col] = pd.to_numeric(df[col], errors='coerce')
        df[col] = df[col].interpolate(method='linear').round().astype('Int64')

    # Save the cleaned full-resolution data
    df.to_csv('cleaned_concurrent.csv')

    # Group by month and year, then average
    monthly_avg = df.resample('ME').mean().round().astype('Int64')
    monthly_avg.to_csv('concurrent_simple.csv')

    return df, monthly_avg

# Run it
concurrent_players, concurrent_simple = clean_data('concurrent_players_4_22_25.csv')

In [74]:
# Load and prepare the data
df = pd.read_csv('concurrent_simple.csv', parse_dates=['DateTime'])

# Create an interactive line plot
def create_plot(df):
    df.set_index('DateTime', inplace=True)

    fig = go.Figure()

    fig.add_trace(go.Scatter(x=df.index, y=df['Users'],
                            mode='lines',
                            name='Users',
                            line=dict(color='skyblue')))
    fig.add_trace(go.Scatter(x=df.index, y=df['Average Users'],
                            mode='lines',
                            name='Average Users',
                            line=dict(color='gold')))
    fig.add_trace(go.Scatter(x=df.index, y=df['In-Game'],
                            mode='lines',
                            name='In-Game',
                            line=dict(color='limegreen')))

    # Update layout for better visuals
    fig.update_layout(
        title='Concurrent Users Over Time',
        xaxis_title='Date',
        yaxis_title='Users',
        legend_title='Legend',
        template='plotly_white',
        height=500,
        width=1600,
        xaxis=dict(
            tickformat='%Y',
            dtick='M12',  # Show one tick per year 
            tickangle=45,
            tickmode='auto',
            nticks=20
        )
    )

    # Show the plot
    fig.show()

create_plot(df)


In [70]:
# Load data
df = pd.read_csv('concurrent_simple.csv', parse_dates=['DateTime'])
df.set_index('DateTime', inplace=True)

# Set up Plotly figure
fig = go.Figure()

# Function to handle regression + plotting + future projection
def plot_regression(column_name, color, dash_style, forecast_days=30):
    if column_name in df.columns:
        y = df[column_name].dropna()
        x = y.index.map(pd.Timestamp.toordinal).values.reshape(-1, 1)

        # Fit model
        model = LinearRegression()
        model.fit(x, y.values.reshape(-1, 1))

        # Historical prediction
        X_full = df.index.map(pd.Timestamp.toordinal).values.reshape(-1, 1)
        y_pred = model.predict(X_full)

        # Future dates
        last_date = df.index[-1]
        future_dates = [last_date + timedelta(days=i) for i in range(1, forecast_days + 1)]
        future_ordinals = np.array([d.toordinal() for d in future_dates]).reshape(-1, 1)
        future_preds = model.predict(future_ordinals)

        # Plot original data
        fig.add_trace(go.Scatter(
            x=df.index, y=df[column_name],
            mode='lines', name=column_name,
            line=dict(color=color)
        ))

        # Plot trend line
        fig.add_trace(go.Scatter(
            x=df.index, y=y_pred.flatten(),
            mode='lines', name=f'{column_name} Trend',
            line=dict(color=color, dash=dash_style)
        ))

        # Plot future projection
        fig.add_trace(go.Scatter(
            x=future_dates, y=future_preds.flatten(),
            mode='lines', name=f'{column_name} Forecast',
            line=dict(color=color, dash='dot')
        ))

        # Layout
        fig.update_layout(
            title='Linear Regression Trends with Forecast',
            xaxis_title='Date',
            yaxis_title='Users',
            template='plotly_white',
            height=600,
            width=1000,
            legend_title='Legend',
            xaxis=dict(
                range=['2015-01-01', df.index.max() + pd.Timedelta(days=forecast_days)],
                tickformat='%Y',
                dtick='M12',  # Show one tick per year
                tickangle=0
            )
        )


In [71]:
fig.data = []  # Clear any existing traces

# Add traces for each column
plot_regression('Users', 'skyblue', 'dash', forecast_days=364)
plot_regression('Average Users', 'gold', 'dot', forecast_days=364)
plot_regression('In-Game', 'limegreen', 'dashdot', forecast_days=364)

fig.show()
