## Community Detection in Stocks Using the Louvain Algorithm
---

*University of California, Berkeley*

In [1]:
# Import necesssary packages
from neo4j import GraphDatabase
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler

In [2]:
# Read file
nasdaq100 = pd.read_csv("/user/projects/project-3-techChanakya/data/NASDAQ_100_Data_From_2010.csv",sep="\t")

In [3]:
# Set date parameters
start_date = '2021-05-01'
end_date = '2021-09-30'

# Scope time for analysis
sub_nasdaq100 = nasdaq100[(nasdaq100['Date']>= start_date) & (nasdaq100['Date'] <= end_date)]

In [4]:
# Neo4j connnection parameters
uri = "neo4j://neo4j:7687"
user = "neo4j"
password = "ucb_mids_w205"

# Neo4j connection
driver = GraphDatabase.driver(uri=uri, auth=(user,password))

In [5]:
# Function to wipe out database by deleting all nodes and relationships
def my_neo4j_wipe_out_database():
    """
    Wipe out the Neo4j database by deleting all nodes and relationships.
    """
    with driver.session() as session:
        session.run("MATCH (n)-[r]->() DELETE r, n")  # Delete nodes with relationships
        session.run("MATCH (n) DELETE n")  # Delete remaining nodes
    print("Neo4j database wiped clean.")

# Function to insert stock and trading day data into Neo4j
def insert_stock_data(tx, stock, date, close, volume):
    """
    Insert stock and trading day nodes into Neo4j.
    """
    tx.run("""
        MERGE (s:Stock {name: $stock})
        MERGE (t:StockTradingDay {date: date($date), close: toFloat($close), volume: toInteger($volume)})
        MERGE (s)-[:TRADING_DAY]->(t)
    """, stock=stock, date=date, close=close, volume=volume)

# Function to insert correlation relationships between stocks into Neo4j
def insert_correlation_data(tx, stock1, stock2, correlation_value):
    """
    Insert correlation relationships between stocks into Neo4j.
    """
    tx.run("""
        MATCH (s1:Stock {name: $stock1}), (s2:Stock {name: $stock2})
        MERGE (s1)-[:CORRELATION {value: $correlation_value}]->(s2)
        MERGE (s2)-[:CORRELATION {value: $correlation_value}]->(s1)
    """, stock1=stock1, stock2=stock2, correlation_value=correlation_value)

# Function to calculate stock correlations based on closing prices
def calculate_stock_correlations(sub_nasdaq100):
    """
    Calculate correlation matrix for the stocks based on their closing prices.
    """
    pivot_data = sub_nasdaq100.pivot(index='Date', columns='Name', values='Close')

    scaler = StandardScaler()
    normalized_data = scaler.fit_transform(pivot_data)

    correlation_matrix = np.corrcoef(normalized_data.T)

    return correlation_matrix, pivot_data.columns.tolist()

# Function to insert correlation relationships for all stock pairs
def insert_all_correlations(correlation_matrix, stock_names, threshold=0.8): # Selected 0.8 correlation to only include strong enough correlations
    """
    Insert correlations into Neo4j for stock pairs with correlation greater than the threshold.
    """
    with driver.session() as session:
        for i, stock1 in enumerate(stock_names):
            for j, stock2 in enumerate(stock_names):
                if i < j:
                    correlation_value = correlation_matrix[i, j]
                    if abs(correlation_value) >= threshold:
                        session.execute_write(insert_correlation_data, stock1, stock2, correlation_value)

# Function to project the graph and run Louvain modularity algorithm
def run_louvain_modularity():
    """
    Project the graph and run the Louvain algorithm to detect communities.
    """
    results = []
    
    with driver.session() as session:
        session.run("""
            CALL gds.graph.project(
                'nasdaq_graph', 
                'Stock', 
                'CORRELATION', 
                {relationshipProperties: ['value']}
            )
        """)

        # Run the Louvain algorithm to detect communities
        result = session.run("""
            CALL gds.louvain.stream('nasdaq_graph', {includeIntermediateCommunities: true})
            YIELD nodeId, communityId, intermediateCommunityIds
            RETURN gds.util.asNode(nodeId).name AS ticker, 
                   communityId AS community, 
                   intermediateCommunityIds AS intermediate_community
            ORDER BY community, ticker ASC
        """)
        
        for record in result:
            results.append({
                'ticker': record['ticker'],
                'community': record['community'],
                'intermediate_community': record['intermediate_community']
            })

    df = pd.DataFrame(results)
    
    # Avoid clipping, show the full data output
    pd.set_option('display.max_rows', None)  
    pd.set_option('display.max_columns', None) 
    pd.set_option('display.width', None) 
    pd.set_option('display.max_colwidth', None)

    print(df)
    
    # Export Louvain results to a csv
    df.to_csv("louvain_communities_full.csv", index=False)

    print("Louvain communities data saved to 'louvain_communities_full.csv'.")

    return df

# Function to insert stock and trading day data into Neo4j
def insert_stock_and_trading_day_data(sub_nasdaq100):
    """
    Insert stock and trading day data into Neo4j.
    """
    with driver.session() as session:
        # Insert stock and trading day nodes
        for _, row in sub_nasdaq100.iterrows():
            stock = row['Name']
            date = row['Date']
            close = row['Close']
            volume = row['Volume']
            
            session.execute_write(insert_stock_data, stock, date, close, volume)

# Run the full function
def main(sub_nasdaq100):
    """
    Main function to run the entire process: wipe database, insert data, calculate correlations, 
    and detect Louvain communities.
    """
    my_neo4j_wipe_out_database()

    insert_stock_and_trading_day_data(sub_nasdaq100)

    correlation_matrix, stock_names = calculate_stock_correlations(sub_nasdaq100)
    insert_all_correlations(correlation_matrix, stock_names)

    run_louvain_modularity()

main(sub_nasdaq100)

driver.close()

Neo4j database wiped clean.
    ticker  community intermediate_community
0     BIIB         14               [14, 14]
1     BKNG         15               [15, 15]
2     CERN         18               [18, 18]
3      CSX         26               [26, 26]
4     CTSH         28               [28, 28]
5     ADBE         29               [29, 29]
6     AVGO         29               [29, 29]
7     CRWD         29               [29, 29]
8     CTAS         29               [29, 29]
9     DLTR         29               [29, 29]
10    DOCU         29               [29, 29]
11    DXCM         29               [29, 29]
12    EBAY         29               [29, 29]
13      FB         29               [29, 29]
14    GOOG         29               [29, 29]
15    ILMN         29               [29, 29]
16    INTU         29               [29, 29]
17    LULU         29               [29, 29]
18    MRVL         29               [29, 29]
19    MSFT         29               [29, 29]
20    NVDA         29      

#### How it Works
The Louvain algorithm uncovered groups of Stocks that are connected with one another, placing them into communities. Our 100 stocks were placed into one of 30 communities as noted in the community column. The intermediate communities information relates to the communities that the ticker passed through in an intermediate step. Most of our stocks went directly to their final community designation, but a few (e.g., TCOM) passed through 2 communities prior to landing on their final one. This occurs since Louvain Modularities are hierarchical in nature, and this designation shows how the network evolved once aggreagating more communities. 

#### Interpreting Results
This view could be very useful to both the investor and financial advisor. Since the data accounts for 5 months of stock prices and closures, these communities show how individual stock correlate with one another overall. For investors looking to diversify their portfolio, selecting stocks from different communities may provide them with an optimal balance. On the other hand, examining stock behavior for stocks within the same community may be useful for financial advisors to note patterns and potentially predict future behavior.