### Analyze and Visualize Data


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import glob
import os

# Specify the path to the data folder
data_folder = '..data/'  # Adjust this if your folder structure is different

# Use glob to get all CSV files in the data folder
csv_files = glob.glob(os.path.join(data_folder, '*.csv'))

# Initialize an empty list to store DataFrames
dataframes = []

# Loop through the list of files and read each one into a DataFrame
for file in csv_files:
    df = pd.read_csv(file)
    dataframes.append(df)

# Optionally, concatenate all DataFrames into a single DataFrame
combined_data = pd.concat(dataframes, ignore_index=True)

# Now you can work with combined_data
print(combined_data.head())

### Visualize Key Indicators:

In [None]:

plt.figure(figsize=(12, 6))
plt.plot(data['date'], data['SMA'], label='SMA', color='blue')
plt.plot(data['date'], data['EMA'], label='EMA', color='orange')
plt.legend()
plt.title('SMA and EMA Trends')
plt.show()


### PyNance for Financial Metrics

In [None]:
import pynance as pn

def calculate_volatility(df):
    df['volatility'] = df['close'].rolling(window=10).std()
    return df

df = pd.read_csv('task_2/data/indicators_data.csv')
df_with_volatility = calculate_volatility(df)
df_with_volatility.to_csv('task_2/data/final_data.csv', index=False

In [5]:
import pandas as pd
import os

def load_and_clean_data(data_folder):
    all_files = [f for f in os.listdir(data_folder) if f.endswith('.csv')]
    df_list = [pd.read_csv(os.path.join(data_folder, file)) for file in all_files]
    combined_df = pd.concat(df_list, ignore_index=True)
    combined_df['date'] = pd.to_datetime(combined_df['date'], errors='coerce')
    return combined_df.dropna()

data = load_and_clean_data('week_1/data')
data.to_csv('..data/processed_data.csv', index=False)

SyntaxError: invalid non-printable character U+00A0 (1226969161.py, line 5)

###1. Run Sentiment Analysis: Execute scripts/sentiment_analysis.py.

In [3]:
python scripts/sentiment_analysis.py

SyntaxError: invalid syntax (1945954782.py, line 1)

###2. Calculate Returns: Execute scripts/calculate_returns.py.

In [4]:
python scripts/calculate_returns.py

SyntaxError: invalid syntax (1207841381.py, line 1)

###3. Analyze Correlation: Execute scripts/correlation_analysis.py.

In [None]:

python scripts/correlation_analysis.py


###4. Visualize Results: Execute scripts/visualization.py.

In [None]:
python scripts/visualization.py

In [None]:
import os
import pandas as pd
import talib
from textblob import TextBlob

def load_and_clean_data(data_folder):
    """
    Load all CSV files from a folder, combine them, and clean the data.
    """
    all_files = [f for f in os.listdir(data_folder) if f.endswith('.csv')]
    if not all_files:
        raise FileNotFoundError(f"No CSV files found in {data_folder}")
    
    # Combine all data files
    df_list = [pd.read_csv(os.path.join(data_folder, file)) for file in all_files]
    combined_df = pd.concat(df_list, ignore_index=True)
    
    # Ensure 'date' column is in datetime format and drop nulls
    combined_df['date'] = pd.to_datetime(combined_df['date'], errors='coerce')
    combined_df = combined_df.dropna(subset=['date', 'close'])  # Remove rows with invalid dates or prices
    
    print("Data loaded and cleaned successfully.")
    return combined_df

def add_sentiment_analysis(data):
    """
    Add sentiment scores to the dataset if 'headline' column exists.
    """
    if 'headline' in data.columns:
        data['sentiment'] = data['headline'].apply(lambda x: TextBlob(str(x)).sentiment.polarity)
        print("Sentiment analysis added.")
    else:
        print("No 'headline' column found. Skipping sentiment analysis.")
    return data

def calculate_returns(data):
    """
    Calculate daily percentage returns for the 'close' column.
    """
    data['returns'] = data['close'].pct_change()
    print("Daily returns calculated.")
    return data

def add_technical_indicators(data):
    """
    Add common technical indicators using TA-Lib.
    """
    data['SMA'] = talib.SMA(data['close'], timeperiod=14)
    data['EMA'] = talib.EMA(data['close'], timeperiod=14)
    data['RSI'] = talib.RSI(data['close'], timeperiod=14)
    data['MACD'], data['MACD_signal'], _ = talib.MACD(data['close'])
    print("Technical indicators calculated.")
    return data

def analyze_correlation(data, output_path):
    """
    Analyze correlations between numeric columns and save the results.
    """
    correlation_matrix = data.corr()
    correlation_matrix.to_csv(output_path)
    print(f"Correlation analysis saved to {output_path}")

def save_data(data, output_path):
    """
    Save the final dataset to a CSV file.
    """
    data.to_csv(output_path, index=False)
    print(f"Final data saved to {output_path}")

def main():
    # Paths
    data_folder = "week-1/data"
    correlation_output_path = "week-1/correlation_matrix.csv"
    final_output_path = "week-1/final_results.csv"

    # Load and clean data
    data = load_and_clean_data(data_folder)
    
    # Perform sentiment analysis
    data = add_sentiment_analysis(data)
    
    # Calculate returns
    data = calculate_returns(data)
    
    # Add technical indicators
    data = add_technical_indicators(data)
    
    # Analyze correlations
    analyze_correlation(data, correlation_output_path)
    
    # Save the final dataset
    save_data(data, final_output_path)

if __name__ == "__main__":
    main()
