<a href="https://colab.research.google.com/github/Sharansai8/file-sharing-repo/blob/main/trader_sentiment_analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np

# Load the datasets
try:
    trader_df = pd.read_csv('historical_data.csv')
    sentiment_df = pd.read_csv('fear_greed_index.csv')

    print("Trader data loaded successfully.")
    print("Sentiment data loaded successfully.")
except FileNotFoundError as e:
    print(e)
    print("Please make sure the CSV files are uploaded to your Colab environment.")

# Inspect the sentiment data
print("Sentiment Data Info:")
sentiment_df.info()
print("\nSentiment Data Head:")
print(sentiment_df.head())

# The 'date' column is already in a good format, but let's convert it to datetime objects for consistency.
sentiment_df['Date'] = pd.to_datetime(sentiment_df['date'])

# We can drop the original 'timestamp' and 'date' columns as we now have a standardized 'Date' column.
sentiment_df = sentiment_df.drop(columns=['timestamp', 'date'])

print("\nProcessed Sentiment Data Head:")
print(sentiment_df.head())

# Inspect the trader data
print("\nTrader Data Info:")
trader_df.info()
print("\nTrader Data Head:")
print(trader_df.head())

# Convert 'Timestamp IST' to datetime objects, ignoring any errors for now.
# The format seems to be Day-Month-Year Hour:Minute
trader_df['Timestamp IST'] = pd.to_datetime(trader_df['Timestamp IST'], format='%d-%m-%Y %H:%M', errors='coerce')

# Create a new 'Date' column by extracting just the date part.
trader_df['Date'] = trader_df['Timestamp IST'].dt.date
trader_df['Date'] = pd.to_datetime(trader_df['Date'])


# Let's check for any rows where the date conversion might have failed
print(f"\nRows with invalid dates: {trader_df['Timestamp IST'].isnull().sum()}")

# Drop rows where the date is invalid, as we can't merge them.
trader_df.dropna(subset=['Timestamp IST'], inplace=True)


print("\nProcessed Trader Data Head:")
print(trader_df.head())

# Merge the two dataframes on the 'Date' column
merged_df = pd.merge(trader_df, sentiment_df, on='Date', how='left')

print("\nMerged Data Info:")
merged_df.info()
print("\nMerged Data Head:")
print(merged_df.head())

# Check if there are any trades that didn't get a sentiment classification
print(f"\nTrades without sentiment data: {merged_df['classification'].isnull().sum()}")

# Save the merged dataframe to the csv_files directory
# Note: You might need to create the 'csv_files' folder in your Colab environment first if you haven't.
import os
if not os.path.exists('csv_files'):
    os.makedirs('csv_files')

merged_df.to_csv('csv_files/merged_trader_sentiment_data.csv', index=False)

print("\nMerged data has been successfully saved to 'csv_files/merged_trader_sentiment_data.csv'")

Trader data loaded successfully.
Sentiment data loaded successfully.
Sentiment Data Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2644 entries, 0 to 2643
Data columns (total 4 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   timestamp       2644 non-null   int64 
 1   value           2644 non-null   int64 
 2   classification  2644 non-null   object
 3   date            2644 non-null   object
dtypes: int64(2), object(2)
memory usage: 82.8+ KB

Sentiment Data Head:
    timestamp  value classification        date
0  1517463000     30           Fear  2018-02-01
1  1517549400     15   Extreme Fear  2018-02-02
2  1517635800     40           Fear  2018-02-03
3  1517722200     24   Extreme Fear  2018-02-04
4  1517808600     11   Extreme Fear  2018-02-05

Processed Sentiment Data Head:
   value classification       Date
0     30           Fear 2018-02-01
1     15   Extreme Fear 2018-02-02
2     40           Fear 2018-02-03
3     2