In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objs as go

In [2]:
from google.colab import drive
drive.mount('/content/drive')

ValueError: mount failed

In [3]:
from google.colab import drive
import os

# Mount Google Drive
#drive.mount('/content/drive')

# Path to your data folder in Google Drive
data_folder = '/content/drive/MyDrive/Real_Trader_EDA_R1/Data'

# List files in the specified folder to confirm access
folder_path = os.path.join(data_folder, '/content/drive/MyDrive/Real_Trader_EDA_R1/Data')  # Update 'path_to_your_data_folder' with your actual folder path
file_list = os.listdir(folder_path)
file_list

In [4]:
file_path_5min = '/content/drive/MyDrive/Real_Trader_EDA_R1/Data/ES 09-24_Minute_5.csv'
file_path_60min = '/content/drive/MyDrive/Real_Trader_EDA_R1/Data/ES 09-24_Minute_60.csv'
file_path_240min = '/content/drive/MyDrive/Real_Trader_EDA_R1/Data/ES 09-24_Minute_240.csv'

In [5]:
def preprocess_files(file_path):
    try:
        df = pd.read_csv(file_path)
        #convert Time Stamp in date time format.
        df['Time Stamp'] = pd.to_datetime(df['Time Stamp'])
        # Convert BuyersInControlTime to datetime with format specification
        df['BuyersInControlTime'] = pd.to_datetime(df['BuyersInControlTime'], format='%m/%d/%Y %H:%M', errors='coerce')
        df['SellersInControlTime'] = pd.to_datetime(df['SellersInControlTime'], format='%m/%d/%Y %H:%M', errors='coerce')
        # Replace NaT values with 0
        df['BuyersInControlTime'] = df['BuyersInControlTime'].fillna(pd.Timestamp(0))
        df['SellersInControlTime'] = df['SellersInControlTime'].fillna(pd.Timestamp(0))
        return df
    except FileNotFoundError:
      print(f"File not found: {file_path}")
      return None

In [6]:
ES_5min = pd.read_csv(file_path_5min)
ES_5min.head()
pd.set_option('display.max_columns', None)
ES_5min.head()

In [7]:
print(ES_5min.shape)
print(ES_5min.dtypes)
print(ES_5min.isnull().sum())

In [8]:
#convert Time Stamp in date time format.
ES_5min['Time Stamp'] = pd.to_datetime(ES_5min['Time Stamp'])
# Convert BuyersInControlTime to datetime with format specification
ES_5min['BuyersInControlTime'] = pd.to_datetime(ES_5min['BuyersInControlTime'], format='%m/%d/%Y %H:%M', errors='coerce')
ES_5min['SellersInControlTime'] = pd.to_datetime(ES_5min['SellersInControlTime'], format='%m/%d/%Y %H:%M', errors='coerce')

In [9]:
# Replace NaT values with 0
ES_5min['BuyersInControlTime'] = ES_5min['BuyersInControlTime'].fillna(pd.Timestamp(0))
ES_5min['SellersInControlTime'] = ES_5min['SellersInControlTime'].fillna(pd.Timestamp(0))

In [None]:
ES_5min.columns

In [None]:
ES_5min.info()

In [10]:
# Create a Sample from the dataset to visualize the patterns
ES_5min_s = ES_5min.loc[0:200000,:]
ES_5min_s.shape

In [11]:
# Line Plots identify trends,peaks,and throughs in the market over time(5-mins)

fig = go.Figure()
fig.add_trace(go.Scatter(x=ES_5min_s['Time Stamp'], y=ES_5min_s['Open'], mode='lines', name='Open',line=dict(color='Blue', width=2)))
fig.add_trace(go.Scatter(x=ES_5min_s['Time Stamp'], y=ES_5min_s['High'], mode='lines', name='High',line=dict(color='orange', width=2)))
fig.add_trace(go.Scatter(x=ES_5min_s['Time Stamp'], y=ES_5min_s['Low'], mode='lines', name='Low',line=dict(color='Red', width=2)))
fig.add_trace(go.Scatter(x=ES_5min_s['Time Stamp'], y=ES_5min_s['Close'], mode='lines', name='Close',line=dict(color='green', width=2)))

fig.update_layout(title='Price Time Series',
                  xaxis_title='Time',
                  yaxis_title='Price')
fig.show()

In [12]:
ES_5min_1day = ES_5min[ES_5min['Time Stamp']<="2019-06-24"]
ES_5min_1day.shape

# The candlestick chart provides a more detailed visualization of the price movements over time,
# highlighting the opening, closing, high, and low prices for each time period.
fig = go.Figure(data=[go.Candlestick(x=ES_5min_1day['Time Stamp'],
                                     open=ES_5min_1day['Open'],
                                     high=ES_5min_1day['High'],
                                     low=ES_5min_1day['Low'],
                                     close=ES_5min_1day['Close'])])
fig.update_layout(title='Candlestick Chart',
                  xaxis_title='Time',
                  yaxis_title='Price')
fig.show()

In [None]:
# Drop the time-related columns for correlation calculation
#numeric_df = ES_5min.drop(columns=['Time Stamp', 'BuyersInControlTime', 'SellersInControlTime'])

# Calculate the correlation matrix
corr_matrix = ES_5min.corr()

# Visualize the correlation matrix using a heatmap
plt.figure(figsize=(14, 10))
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', fmt=".2f")
plt.title('Correlation Matrix')
plt.show()

In [None]:
correlation_matrix = ES_5min[['Entry', 'Defence', 'Stop', 'Target']].corr()

plt.figure(figsize=(8, 6))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt='.2f', linewidths=0.5)
plt.title('Correlation Matrix')
plt.show()

In [None]:
# Distribution Analysis
# Distribution of Open, High, Low, Close prices
plt.figure(figsize=(14, 7))
ES_5min[['Open', 'High', 'Low', 'Close']].plot(kind='density', subplots=True, layout=(2, 2), sharex=False, figsize=(14, 10))
plt.suptitle('Density Plots of Open, High, Low, Close Prices')
plt.show()

In [None]:
# Resample to daily frequency and plot Close prices
daily_data = ES_5min.resample('D', on='Time Stamp').mean()
plt.figure(figsize=(14, 7))
plt.plot(daily_data.index, daily_data['Close'])
plt.xlabel('Date')
plt.ylabel('Close Price')
plt.title('Daily Close Prices')
plt.show()

In [None]:
correlation_matrix = ES_5min[['Entry', 'Defence', 'Stop', 'Target']].corr()

plt.figure(figsize=(8, 6))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt='.2f', linewidths=0.5)
plt.title('Correlation Matrix')
plt.show()

In [None]:
summary_stats = ES_5min[['Entry', 'Defence', 'Stop', 'Target']].describe()
print(summary_stats)

In [None]:
ES_5min.columns

### **Building logic for generating Entry,Defence,Stop and Target**

---



In [None]:
ES_5min.sample(10)

In [None]:
Test_data = ES_5min

In [None]:
Test_data.shape

In [None]:
import pandas as pd

def generate_trading_features(df):
    # Initialize the new columns with zeros
    df['Entry'] = 0
    df['Target_b'] = 0
    df['Target_s'] = 0
    df['Defence_buyer'] = 0
    df['Defence_sale'] = 0
    df['Stop_buyer'] = 0
    df['Stop_sale'] = 0

    # Iterate over each row to calculate the features
    for index, row in df.iterrows():
        buyers_entry = 0
        sellers_entry = 0
        target_b = 0
        target_s = 0
        defence_buyer = 0
        defence_sale = 0
        stop_buyer = 0
        stop_sale = 0

        if pd.notna(row['BuyersInControlPrice']) and row['BuyersInControlPrice'] != 0:
            buyers_entry = row['BuyersInControlPrice']
            target_b = buyers_entry + 2
            defence_buyer = buyers_entry - 1  # Default defense for buyer
            stop_buyer = buyers_entry - 2     # Default stop for buyer

        if pd.notna(row['SellersInControlPrice']) and row['SellersInControlPrice'] != 0:
            sellers_entry = row['SellersInControlPrice']
            target_s = sellers_entry - 2
            defence_sale = sellers_entry + 1  # Modified defense for seller
            stop_sale = sellers_entry + 2     # Modified stop for seller

        # Use the non-zero value as the entry
        entry = buyers_entry if buyers_entry != 0 else sellers_entry

        df.at[index, 'Entry'] = entry

        # Determine the defence and stop values
        df.at[index, 'Defence'] = defence_buyer if buyers_entry != 0 else defence_sale
        df.at[index, 'Stop'] = stop_buyer if buyers_entry != 0 else stop_sale

        # Determine the target values
        df.at[index, 'Target_b'] = target_b if buyers_entry != 0 else 0
        df.at[index, 'Target_s'] = target_s if sellers_entry != 0 else 0

    # Create a new column for combined target
    df["Target"] = df["Target_b"] + df["Target_s"]

    # Drop the individual target columns
    df.drop(columns=["Defence_buyer", "Defence_sale", "Stop_buyer", "Stop_sale", "Target_b", "Target_s"], axis=1, inplace=True)

    return df



In [None]:
Test_data = ES_5min[['Time Stamp', 'Open', 'High', 'Low', 'Close', 'SellersLevel4',
                'SellersLevel3', 'SellersLevel2', 'SellersLevel1', 'SellersLevel0',
                'BuyersLevel0', 'BuyersLevel1', 'BuyersLevel2', 'BuyersLevel3',
                'BuyersLevel4', 'BuyersInControlTime', 'BuyersInControlPrice',
                'SellersInControlTime', 'SellersInControlPrice']]

Test_data = generate_trading_features(Test_data)

# Display a sample of the resulting DataFrame
Test_data.sample(10)

In [None]:
Test_data.to_csv('/content/drive/MyDrive/Real_Trader_EDA_R1/Data/New_5min_data.csv')

### Data Visulization Using Candle Stick **Chart**

In [None]:
New_5min = pd.read_csv('/content/drive/MyDrive/Real_Trader_EDA_R1/Data/New_5min_data.csv')
New_5min.drop(columns=['Unnamed: 0'],inplace=True)
New_5min.sample(10)

In [None]:
New_5min = New_5min[New_5min['Time Stamp']<="2019-06-24"]
New_5min.shape

# The candlestick chart provides a more detailed visualization of the price movements over time,
# highlighting the opening, closing, high, and low prices for each time period.
fig = go.Figure(data=[go.Candlestick(x=New_5min['Time Stamp'],
                                     open=New_5min['Open'],
                                     high=New_5min['High'],
                                     low=New_5min['Low'],
                                     close=New_5min['Close'])])
fig.update_layout(title='Candlestick Chart',
                  xaxis_title='Time',
                  yaxis_title='Price')
fig.show()