In [6]:
!pip install tabulate

Collecting tabulate
  Downloading tabulate-0.9.0-py3-none-any.whl.metadata (34 kB)
Downloading tabulate-0.9.0-py3-none-any.whl (35 kB)
Installing collected packages: tabulate
Successfully installed tabulate-0.9.0



[notice] A new release of pip is available: 25.1.1 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import yfinance as yf
import os

def run_analysis():
    print("Starting analysis...")

    APPS = ['zerodha_kite', 'groww', 'coinbase', 'binance']
    FILENAMES = {app: f"{app}_reviews_last5y_preds_filtered_no_ui.csv" for app in APPS}
    MARKET_TICKERS = ['^NSEI', '^INDIAVIX', 'BTC-USD']
    START_DATE = '2020-01-01'
    END_DATE = '2024-12-31'

    print("\n--- Loading and Preprocessing Data ---")
    dataframes = {}
    for app in APPS:
        filename = FILENAMES[app]
        if os.path.exists(filename):
            print(f"Loading {filename}...")
            try:
                df = pd.read_csv(filename, on_bad_lines='skip', dtype={'appVersion': 'str'})
                
                df['date'] = pd.to_datetime(df['date'])
                
                df = df[(df['date'] >= START_DATE) & (df['date'] <= END_DATE)].copy()
                
                df['is_stressed'] = (df['sent_label_raw'] == 'negative').astype(int)
                
                dataframes[app] = df
                print(f"Loaded {len(df)} reviews for {app} within date range.")
            except Exception as e:
                print(f"Error loading {filename}: {e}")
        else:
            print(f"--- WARNING: File not found: {filename} --- (Skipping)")
    
    if not dataframes:
        print("No dataframes were loaded. Please check your CSV filenames.")
        return

    print("\nAll data loaded and preprocessed.")

    print("\n\n--- Section 4.2: Descriptive Statistics ---")
    stats_summary = []
    for app, df in dataframes.items():
        total_n = len(df)
        sentiment_counts = df['sent_label_raw'].value_counts()
        sentiment_pct = df['sent_label_raw'].value_counts(normalize=True) * 100
        
        stats = {
            'App': app.capitalize(),
            'Total Reviews (N)': total_n,
            'Positive (%)': sentiment_pct.get('positive', 0),
            'Neutral (%)': sentiment_pct.get('neutral', 0),
            'Negative (%)': sentiment_pct.get('negative', 0),
            'Positive (n)': sentiment_counts.get('positive', 0),
            'Neutral (n)': sentiment_counts.get('neutral', 0),
            'Negative (n)': sentiment_counts.get('negative', 0),
        }
        stats_summary.append(stats)

    stats_df = pd.DataFrame(stats_summary)
    print("\n--- Markdown Table for Paper ---")
    print(stats_df.to_markdown(index=False))
    print("----------------------------------")


    print("\n--- Aggregating Data by Day ---")
    aggregated_dfs = {}
    for app, df in dataframes.items():
        df_indexed = df.set_index('date')
        daily_agg = df_indexed.resample('D').agg(
            stress_index=('is_stressed', 'mean'),
            review_volume=('is_stressed', 'count')
        )
        daily_agg['stress_index'] = daily_agg['stress_index'].fillna(0)
        daily_agg['stress_index_7d_ma'] = daily_agg['stress_index'].rolling(window=7, min_periods=1).mean()
        aggregated_dfs[app] = daily_agg
    print("Daily stress index and rolling average calculated.")

    print("\n--- Downloading Market Data ---")
    try:
        market_data = yf.download(MARKET_TICKERS, start=START_DATE, end=END_DATE)
        print("Market data download complete.")
    except Exception as e:
        print(f"Error downloading market data: {e}")
        return

    print("\n--- Generating Plots (this may take a moment) ---")
    
    sns.set(style="whitegrid", font_scale=1.1)
    
    try:
        fig, ax1 = plt.subplots(figsize=(15, 8))
        
        ax1.plot(aggregated_dfs['zerodha_kite'].index, aggregated_dfs['zerodha_kite']['stress_index_7d_ma'], label='Zerodha Stress Index (7d MA)', color='blue', alpha=0.8)
        ax1.plot(aggregated_dfs['groww'].index, aggregated_dfs['groww']['stress_index_7d_ma'], label='Groww Stress Index (7d MA)', color='green', alpha=0.8)
        ax1.set_ylabel('Stress Index (7-day MA of % Negative Reviews)', color='blue')
        ax1.tick_params(axis='y', labelcolor='blue')
        ax1.set_xlabel('Date')
        ax1.set_ylim(0, max(aggregated_dfs['zerodha_kite']['stress_index_7d_ma'].max(), aggregated_dfs['groww']['stress_index_7d_ma'].max()) * 1.2)
        
        ax2 = ax1.twinx()
        ax2.plot(market_data.index, market_data['Close']['^INDIAVIX'], label='India VIX (Volatility)', color='red', linestyle='--', alpha=0.7)
        ax2.set_ylabel('India VIX', color='red')
        ax2.tick_params(axis='y', labelcolor='red')
        ax2.set_ylim(0, market_data['Close']['^INDIAVIX'].max() * 1.2)
        
        fig.legend(loc='upper left', bbox_to_anchor=(0.1, 0.9))
        plt.title('Figure 1: Indian Broker App Stress vs. Market Volatility (VIX)')
        plt.savefig('figure_1_india_stress_vs_vix.png', dpi=300, bbox_inches='tight')
        plt.close(fig)
        print("Saved figure_1_india_stress_vs_vix.png")
    except Exception as e:
        print(f"Error generating Figure 1: {e}")

    try:
        fig, ax1 = plt.subplots(figsize=(15, 8))
        
        ax1.plot(aggregated_dfs['coinbase'].index, aggregated_dfs['coinbase']['stress_index_7d_ma'], label='Coinbase Stress Index (7d MA)', color='purple', alpha=0.8)
        ax1.plot(aggregated_dfs['binance'].index, aggregated_dfs['binance']['stress_index_7d_ma'], label='Binance Stress Index (7d MA)', color='orange', alpha=0.8)
        ax1.set_ylabel('Stress Index (7-day MA of % Negative Reviews)', color='purple')
        ax1.tick_params(axis='y', labelcolor='purple')
        ax1.set_xlabel('Date')
        ax1.set_ylim(0, max(aggregated_dfs['coinbase']['stress_index_7d_ma'].max(), aggregated_dfs['binance']['stress_index_7d_ma'].max()) * 1.2)
        
        ax2 = ax1.twinx()
        ax2.plot(market_data.index, market_data['Close']['BTC-USD'], label='Bitcoin Price (USD)', color='black', linestyle='--', alpha=0.7)
        ax2.set_ylabel('Bitcoin Price (USD)', color='black')
        ax2.tick_params(axis='y', labelcolor='black')
        ax2.set_ylim(0, market_data['Close']['BTC-USD'].max() * 1.2)
        
        fig.legend(loc='upper left', bbox_to_anchor=(0.1, 0.9))
        plt.title('Figure 2: Crypto App Stress vs. Bitcoin Price')
        plt.savefig('figure_2_crypto_stress_vs_btc.png', dpi=300, bbox_inches='tight')
        plt.close(fig)
        print("Saved figure_2_crypto_stress_vs_btc.png")
    except Exception as e:
        print(f"Error generating Figure 2: {e}")

    print("\n\n--- Section 4.4: Correlation Analysis ---")
    
    try:
        combined_df = pd.DataFrame(index=market_data.index)
        
        combined_df['NIFTY_50'] = market_data['Close']['^NSEI']
        combined_df['INDIA_VIX'] = market_data['Close']['^INDIAVIX']
        combined_df['BTC_USD'] = market_data['Close']['BTC-USD']
        
        for app, agg_df in aggregated_dfs.items():
            combined_df[f'{app}_stress_7d_ma'] = agg_df['stress_index_7d_ma']
            
        combined_df = combined_df.dropna(how='all')
        
        corr_matrix = combined_df.corr(method='pearson')
        
        print("\nCorrelation Matrix (Data for Table 1):\n")
        print(corr_matrix)
        print("\n--- Markdown Table for Paper ---")
        print(corr_matrix.to_markdown(floatfmt=".2f"))
        print("----------------------------------")

        plt.figure(figsize=(12, 8))
        sns.heatmap(
            corr_matrix, 
            annot=True, 
            fmt=".2f", 
            cmap="coolwarm", 
            linewidths=.5,
            center=0
        )
        plt.title('Figure 3: Correlation Matrix Heatmap')
        plt.savefig('figure_3_correlation_heatmap.png', dpi=300, bbox_inches='tight')
        plt.close()
        print("\nSaved figure_3_correlation_heatmap.png")
    except Exception as e:
        print(f"Error generating Correlation Matrix: {e}")


if __name__ == "__main__":
    run_analysis()

Starting analysis...

--- Loading and Preprocessing Data ---
Loading zerodha_kite_reviews_last5y_preds_filtered.csv...
Loaded 35693 reviews for zerodha_kite within date range.
Loading groww_reviews_last5y_preds_filtered.csv...
Loaded 128190 reviews for groww within date range.
Loading coinbase_reviews_last5y_preds_filtered.csv...
Loaded 59622 reviews for coinbase within date range.
Loading binance_reviews_last5y_preds_filtered.csv...
Loaded 88526 reviews for binance within date range.

All data loaded and preprocessed.


--- Section 4.2: Descriptive Statistics ---

--- Markdown Table for Paper ---
| App          |   Total Reviews (N) |   Positive (%) |   Neutral (%) |   Negative (%) |   Positive (n) |   Neutral (n) |   Negative (n) |
|:-------------|--------------------:|---------------:|--------------:|---------------:|---------------:|--------------:|---------------:|
| Zerodha_kite |               35693 |        26.2909 |       47.1913 |        26.5178 |           9384 |         168

  market_data = yf.download(MARKET_TICKERS, start=START_DATE, end=END_DATE)
[*********************100%***********************]  3 of 3 completed

Daily stress index and rolling average calculated.

--- Downloading Market Data ---
Market data download complete.

--- Generating Plots (this may take a moment) ---





Saved figure_1_india_stress_vs_vix.png
Saved figure_2_crypto_stress_vs_btc.png


--- Section 4.4: Correlation Analysis ---

Correlation Matrix (Data for Table 1):

                           NIFTY_50  INDIA_VIX   BTC_USD  \
NIFTY_50                   1.000000  -0.624749  0.752380   
INDIA_VIX                 -0.624749   1.000000 -0.344326   
BTC_USD                    0.752380  -0.344326  1.000000   
zerodha_kite_stress_7d_ma  0.174442  -0.100281  0.104799   
groww_stress_7d_ma         0.350814  -0.257200 -0.070652   
coinbase_stress_7d_ma     -0.308238  -0.048914 -0.513891   
binance_stress_7d_ma      -0.689582   0.387552 -0.325454   

                           zerodha_kite_stress_7d_ma  groww_stress_7d_ma  \
NIFTY_50                                    0.174442            0.350814   
INDIA_VIX                                  -0.100281           -0.257200   
BTC_USD                                     0.104799           -0.070652   
zerodha_kite_stress_7d_ma                   1.00000