# Part 3

### Import the relevant packages

In [1]:
import numpy as np
import pandas as pd
import os

### Import the data and concatenate all the files so we can work with one file

In [2]:
folder_path = '/content/sample_data/QQQ'

options_dfs = []

for filename in os.listdir(folder_path):
    if ('2022' not in filename) and ('2023' not in filename):
        continue
    file_path = os.path.join(folder_path, filename)

    options_df = pd.read_csv(file_path, low_memory=False)

    options_dfs.append(options_df)

options_df = pd.concat(options_dfs, ignore_index=True)

# fix column names by removing [, ], " " characters
options_df.columns = options_df.columns.str.strip('[] ')

# Define data types for all columns
for col in options_df.columns:
    if col.startswith('C_') or col.startswith('P_'):
        options_df[col] = pd.to_numeric(options_df[col], errors='coerce')

dtypes = {
    'QUOTE_UNIXTIME': 'int64',
    'QUOTE_READTIME': 'datetime64[ns]',
    'QUOTE_DATE': 'datetime64[ns]',
    'QUOTE_TIME_HOURS': 'float64',
    'UNDERLYING_LAST': 'float64',
    'EXPIRE_DATE': 'datetime64[ns]',
    'EXPIRE_UNIX': 'int64',
    'DTE': 'float64',
    'C_DELTA': 'float64',
    'C_GAMMA': 'float64',
    'C_VEGA': 'float64',
    'C_THETA': 'float64',
    'C_RHO': 'float64',
    'C_IV': 'float64',
    'C_VOLUME': 'float64',
    'C_LAST': 'float64',
    'C_SIZE': 'float64',
    'C_BID': 'float64',
    'C_ASK': 'float64',
    'STRIKE': 'float64',
    'P_BID': 'float64',
    'P_ASK': 'float64',
    'P_SIZE': 'float64',
    'P_LAST': 'float64',
    'P_DELTA': 'float64',
    'P_VOLUME': 'float64',
    'STRIKE_DISTANCE': 'float64',
    'STRIKE_DISTANCE_PCT': 'float64'
}

# Convert columns to the specified data types
options_df = options_df.astype(dtypes)

### Leave only the relevant rows

In [3]:
options_df = options_df[(options_df['C_VOLUME'] >= 100)  & (options_df['P_VOLUME'] >= 100)]

### Define our profit, and what we should do to make it

In [10]:
options_df['Profit'] = (options_df['C_LAST'] + options_df['STRIKE'] - options_df['UNDERLYING_LAST'] - options_df['P_LAST']).abs()

In [11]:
max_profit_df = options_df.groupby('QUOTE_DATE').max('Profit')
max_profit_df['Action'] =  np.where(
    (max_profit_df['C_LAST'] + max_profit_df['STRIKE'] - max_profit_df['UNDERLYING_LAST'] > max_profit_df['P_LAST']), 'Buy Put', 'Buy Call')
max_profit_df.sort_values(by = 'Profit', ascending = False).head(5)[['C_LAST', 'P_LAST', 'UNDERLYING_LAST', 'STRIKE', 'Profit', 'Action']]

Unnamed: 0_level_0,C_LAST,P_LAST,UNDERLYING_LAST,STRIKE,Profit,Action
QUOTE_DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2023-03-22,51.89,32.64,306.07,325.0,23.18,Buy Put
2023-12-12,61.33,35.07,398.75,430.0,21.87,Buy Put
2023-03-01,47.2,48.73,291.16,340.0,20.39,Buy Put
2023-05-31,54.66,43.5,347.66,375.0,20.14,Buy Put
2023-08-07,58.98,24.45,375.18,390.0,19.35,Buy Put


### As we can see, these are to top 5 days in terms of profit.
### The full description of what we should do to make our profit is in the WORD file.  

In [None]:
max_profit_df.Profit.sum() * 100

226194.99999999997

### The over-all profit we suppose to do in terms of 100 units-deals