### Install yfinance for Market information

In [None]:
pip install yfinance

### Import packages

In [None]:
import pandas as pd
import numpy as np
from datetime import datetime
import seaborn as sns
import matplotlib.pyplot as plt


### Read in files

In [None]:
oil=pd.read_csv('./data/oil_dfinal.csv')

In [None]:
tech=pd.read_csv('./data/tech_dfinal.csv')

In [None]:
events=pd.read_csv('./data/final_events.csv')

### Oil Data Cleaning

In [None]:
oil['Date']=oil.Date.str[:10]

In [None]:
oil["Date"]=pd.to_datetime(oil['Date'])

In [None]:
oil.dtypes

In [None]:
oil.head(3)

In [None]:
to_drop=['Volume','Dividends','Stock Splits']
oil=oil.drop(columns=to_drop)

In [None]:
oil.head(3)

### Events Cleaning 

In [None]:
events=events.loc[(events['Year']>2000) & (events["Year"]<=2020)]

In [None]:
events['Date']=pd.to_datetime(events['Date'])

In [None]:
events=events[events['Date'].notnull()]

In [None]:
events['Type']=events['Type'].str.replace('Domestic  Terrorism','Domestic Terrorism')


In [None]:
events['Type']=events['Type'].str.replace('Domestic Terrorism ','Domestic Terrorism')

In [None]:
index_to_drop=657

In [None]:
events=events.drop(index=index_to_drop)

In [None]:
events.Type=events['Type'].str.replace('Accident ','Accident')

In [None]:
events=events.rename(columns={'Date':'Event_date'})


In [None]:
events=events.loc[:,['Fatalities','Year','Article','Type','Location','Event_date']]

In [None]:
events.Fatalities=events.Fatalities.str.replace('2,996','2996')

In [None]:
events.Fatalities=events.Fatalities.astype(int)

In [None]:
events['month']=events['Event_date'].dt.month

In [None]:
events.Type.loc[events['Type']=='Terrorism']='Int Terrorism'

In [None]:
events['Location']=events['Location'].str.replace('Sutherland Springs, Texas','Domestic')

### Create DF for before the event and after the event

In [None]:
#Before event
rolling_window_size = '7D'

oilb = pd.DataFrame()


for index, event_row in events.iterrows():
    event_date = event_row['Event_date']
    
   
    date_range_start = event_date - pd.Timedelta(days=7)
    
    
    filtered_data = oil[(oil['Date'] >= date_range_start) & (oil['Date'] <= event_date)]
    
    filtered_data['Event_date'] = event_date
    filtered_data['Type'] = event_row['Type']
    
    oilb = pd.concat([oilb, filtered_data])


In [None]:
oilb=oilb.reset_index()

In [None]:
#After event
rolling_window_size = '7D'

oila = pd.DataFrame()

for index, event_row in events.iterrows():
    event_date = event_row['Event_date']
    
    date_range_end = event_date + pd.Timedelta(days=7)
    
    filtered_data = oil[(oil['Date'] > event_date) & (oil['Date'] <= date_range_end)]
    
    
    filtered_data['Event_date'] = event_date
    filtered_data['Type'] = event_row['Type']
    
    
    oila = pd.concat([oila, filtered_data])

oila = oila.reset_index(drop=True)


In [None]:
oilb['Price Change'] = oilb['Close'] - oilb['Open']
oila['Price Change']=oila['Close']-oila['Open']
oilb['Percent Change'] = ((oilb['Close'] - oilb['Open']) / oilb['Open']) * 100
oila['Percent Change'] = ((oila['Close'] - oila['Open']) / oila['Open']) * 100


# Average Percent Change

In [None]:
#oilb average percent change by type



# Use a colorblind-friendly palette
sns.set_palette("colorblind")

# Define a list of colors from the palette for each event type
colors = sns.color_palette("colorblind", n_colors=len(oilb['Type'].unique()))

# Group the DataFrame by 'Type' and calculate the average price change
avg_price_change_by_type = oilb.groupby('Type')['Percent Change'].mean()

# Create a bar plot with different colors for each event type
plt.figure(figsize=(10, 6))
avg_price_change_by_type.plot(kind='bar', color=colors)

# Adding labels and title
plt.xlabel('Event Type')
plt.ylabel('Average Price Change')
plt.title('Average Oil Percent Change Before Event')

# Show the plot
plt.tight_layout()
plt.show()


In [None]:
#oila average percent change by type



# Use a colorblind-friendly palette
sns.set_palette("colorblind")

# Define a list of colors from the palette for each event type
colors = sns.color_palette("colorblind", n_colors=len(oilb['Type'].unique()))

# Group the DataFrame by 'Type' and calculate the average price change
avg_price_change_by_type = oila.groupby('Type')['Percent Change'].mean()

# Create a bar plot with different colors for each event type
plt.figure(figsize=(10, 6))
avg_price_change_by_type.plot(kind='bar', color=colors)

# Adding labels and title
plt.xlabel('Event Type')
plt.ylabel('Average Price Change')
plt.title('Average Oil Percent Change After Event')

# Show the plot
plt.tight_layout()
plt.show()

# Average Price change

In [None]:
# average price change oilb befroe

sns.set_palette("colorblind")

# Group the DataFrame by 'Type' and calculate the mean effect size for each measurement
avg_pricing = oilb.groupby('Type')[['Open', 'High', 'Low', 'Close']].mean()

# Create a bar plot
plt.figure(figsize=(20, 15))
ax=avg_pricing.plot(kind='barh');

# Adding labels and title
plt.xlabel('Event Type')
plt.ylabel('Average Price')
plt.title('Average Oil Prices Before Events')
ax.legend(loc='upper right')
# Show the plot
plt.tight_layout()
plt.show()

# oil average price after event

In [None]:


# Set the color palette
sns.set_palette("colorblind")

# Group the DataFrame by 'Type' and calculate the mean effect size for each measurement
avg_pricing = oila.groupby('Type')[['Open', 'High', 'Low', 'Close']].mean()

# Create a larger bar plot for presentation
plt.figure(figsize=(30, 15))  # Adjust the width and height as needed
ax = avg_pricing.plot(kind='barh')

# Adding labels and title
plt.xlabel('Event Type')
plt.ylabel('Average Price')
plt.title('Average Oil Prices After Events')

# Move the legend to the upper right corner
ax.legend(loc='upper right')

# Show the plot
plt.tight_layout()
plt.show()


### Lets Dive

In [None]:
### effect size of oil industry.

In [None]:




grouped_oilb = oilb.groupby('Type')
grouped_oila = oila.groupby('Type')
measurements = ['Open', 'High', 'Low', 'Close']
effect_sizes_dict = {'Type': []}

for measurement in measurements:
    effect_sizes_dict[measurement] = []


for group, group_data_oilb in grouped_oilb:
    group_data_oila = grouped_oila.get_group(group)
    
    effect_sizes_dict['Type'].append(group)
    
    for measurement in measurements:
        mean_oilb = group_data_oilb[measurement].mean()
        mean_oila = group_data_oila[measurement].mean()
        pooled_std = np.sqrt((group_data_oilb[measurement].std()**2 + group_data_oila[measurement].std()**2) / 2)
        
        effect_size = (mean_oila - mean_oilb) / pooled_std
        effect_sizes_dict[measurement].append(effect_size)
effect_cohen = pd.DataFrame(effect_sizes_dict)




# visualization cohen oil industry

In [None]:
# cohens effect size
# Set the color palette
sns.set_palette("colorblind")

# Group the DataFrame by 'Type' and calculate the mean effect size for each measurement
avg_effect_size_by_type = effect_cohen.groupby('Type')[['Open', 'High', 'Low', 'Close']].mean()

# Create a bar plot
plt.figure(figsize=(22, 55))
avg_effect_size_by_type.plot(kind='barh')

# Adding labels and title
plt.xlabel('Event Type')
plt.ylabel('Average Oil Effect Size')
plt.title('Average Oil Effect Size by Event Type')

# Show the plot
plt.tight_layout()
plt.show()
;

In [None]:
effect_cohen.to_csv()

indicates that the means of the "oila" group (after the event) are lower than the means of the "oilb" group (before the event) for the specific columns you're analyzing. This suggests that the values for the "oila" group are generally lower than those for the "oilb" group.



### Hedge's d effect size for smaller sample sizes

In [None]:

grouped_oilb = oilb.groupby('Type')
grouped_oila = oila.groupby('Type')

measurements = ['Open', 'High', 'Low', 'Close']

effect_sizes_dict = {'Type': []}

for measurement in measurements:
    effect_sizes_dict[measurement] = []

for group, group_data_oilb in grouped_oilb:
    group_data_oila = grouped_oila.get_group(group)
    
    effect_sizes_dict['Type'].append(group)
    
    for measurement in measurements:
        mean_oilb = group_data_oilb[measurement].mean()
        mean_oila = group_data_oila[measurement].mean()
        std_oilb = group_data_oilb[measurement].std()
        std_oila = group_data_oila[measurement].std()
        n_oilb = len(group_data_oilb)
        n_oila = len(group_data_oila)
        
        # Calculate Hedges' g effect size for the current measurement and group
        pooled_std = np.sqrt(((std_oilb ** 2 * (n_oilb - 1)) + (std_oila ** 2 * (n_oila - 1))) / (n_oilb + n_oila - 2))
        hedges_g = (mean_oila - mean_oilb) / pooled_std
        
        # Store the effect size for the current measurement
        effect_sizes_dict[measurement].append(hedges_g)

# Create a DataFrame from the effect_sizes_dict
effect_hedge = pd.DataFrame(effect_sizes_dict)




In [None]:
effect_hedge

In [None]:
effect_cohen

- Here we can see that since cohen and edge have similar value that we have a large sample size, meaning we can limit the assumption of bias in the data frame.

### Averages by Type

In [None]:
before_oilav= oilb.groupby('Type').agg({'Open':'mean','High':'mean','Low':'mean','Close':'mean'}).reset_index()

In [None]:
after_oilav= oila.groupby('Type').agg({'Open':'mean','High':'mean','Low':'mean','Close':'mean'}).reset_index()

In [None]:
after_oilav

In [None]:
after_oilav.to_csv('averages_after',index=False)

In [None]:
before_oilav

* Here we can see what the effect size is interpreting for us, which is that the means before the event tend to be higher than after which can be seen by the negative outputs on effect size

### Median by types 

In [None]:
after_mediano = oila.groupby('Type')[['Open', 'High', 'Low', 'Close']].median().reset_index()
before_mediano = oilb.groupby('Type')[['Open', 'High', 'Low', 'Close']].median().reset_index()

In [None]:
after_mediano

In [None]:
before_mediano

#### Correlation of before and after for oil

In [None]:

after_oil = oila.groupby('Type')

correlation_aftero = after_oil[[ 'Close', 'Open']].corr().unstack(level=0)

correlation_aftero

In [None]:

before_oil = oilb.groupby('Type')


correlation_beforeo = before_oil[[ 'Close', 'Open']].corr().unstack(level=0)

correlation_aftero

### Finding the p-value for Opening,High,Low,Close

In [None]:

from scipy.stats import ttest_ind


# Group the DataFrame by 'Type'
grouped = oilb.groupby('Type')

# List of columns for which to calculate p-values
columns_to_test = ['Open', 'High', 'Low', 'Close']

# Create an empty DataFrame to store the p-values
p_values_before = pd.DataFrame(columns=['Column', 'Type', 'P-Value'])

# Calculate p-values for each column and each group
for column in columns_to_test:
    for group, group_data in grouped:
        group_values = group_data[column]
        p_value = ttest_ind(group_values, oilb[column]).pvalue
        
        p_values_before.loc[len(p_values_before)] = [column, group, p_value]

# Display the DataFrame with p-values
p_values_before


In [None]:
p_values_before.loc[p_values_before['P-Value']>=.05]

In [None]:
#p_value after

# Group the DataFrame by 'Type'
grouped = oila.groupby('Type')

# List of columns for which to calculate p-values
columns_to_tests = ['Open', 'High', 'Low', 'Close']

# Create an empty DataFrame to store the p-values
p_values_after = pd.DataFrame(columns=['Column', 'Type', 'P-Value'])

# Calculate p-values for each column and each group
for column in columns_to_tests:
    for group, group_data in grouped:
        group_values = group_data[column]
        p_value = ttest_ind(group_values, oila[oila['Type'] == group][column]).pvalue
        
        p_values_after.loc[len(p_values_after)] = [column, group, p_value]

# Display the DataFrame with p-values
p_values_after


* we can see that p-values<.05 indicates observed differences between groups are not random or by chance. Giving them statistical significance meaning we would reject the null hypothesis

*  If the p-value is greater than 0.05, it suggests that the observed differences between the groups could plausibly have occurred due to random variation. We would not want to reject the null-hypothesis for these values

In [None]:
oilb

### Correlation HeatMap oilb

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

# Calculate the correlation matrix grouped by 'Type'
correlation_matrix_grouped = oilb.groupby('Type')[['Open','High','Low', 'Close']].corr()

# Create a heatmap
plt.figure(figsize=(10, 8))
sns.heatmap(correlation_matrix_grouped, annot=True, cmap='coolwarm', center=0)
plt.title('Correlation Heatmap for Oil Before Event Prices Grouped by Type')
plt.show()


In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

# Calculate the correlation matrix grouped by 'Type'
correlation_matrix_grouped = oila.groupby('Type')[['Open', 'High', 'Low', 'Close']].corr()

# Create a heatmap
plt.figure(figsize=(10, 8))
sns.heatmap(correlation_matrix_grouped, annot=True, cmap='coolwarm', center=0)
plt.title('Correlation Heatmap for Oil After Event Prices Grouped by Type')
plt.show()


### Comparing the price change between before event and after

### visualization that shows the difference of avg price for oil

In [None]:
percnt_changeb=oilb.groupby('Type')['Percent Change'].mean().to_frame().reset_index()
percnt_changea=oila.groupby('Type')['Percent Change'].mean().to_frame().reset_index()

In [None]:
percnt_changeb

In [None]:
percnt_changea

### Tech Clean frame

In [None]:
tech.head(3)

In [None]:
tech['Date']=tech.Date.str[:10]

In [None]:
tech.info()

In [None]:
tech['Date']=pd.to_datetime(tech['Date'])

In [None]:
tech.dtypes

In [None]:
to_drop=['Volume','Dividends','Stock Splits']
tech=tech.drop(columns=to_drop)

### Tech Before event has occurred 

In [None]:
rolling_window_size = '7D'

techb = pd.DataFrame()

# Iterate through each historical event
for index, event_row in events.iterrows():
    event_date = event_row['Event_date']
    
    # Calculate the date range for 7 days before the event
    date_range_start = event_date - pd.Timedelta(days=7)
    
    # Filter oil DataFrame for the specified date range
    filtered_data = tech[(tech['Date'] >= date_range_start) & (tech['Date'] <= event_date)]
    
    # Add the 'Event_date' and 'Type' columns from events DataFrame to each row
    filtered_data['Event_date'] = event_date
    filtered_data['Type'] = event_row['Type']
    
    # Append the filtered data to the oilb DataFrame
    techb = pd.concat([techb, filtered_data])

In [None]:
techb=techb.reset_index(drop=True)

In [None]:
techb

### Tech after

In [None]:
rolling_window_size = '7D'

techa = pd.DataFrame()

# Iterate through each historical event
for index, event_row in events.iterrows():
    event_date = event_row['Event_date']
    
    # Calculate the date range for 7 days after the event
    date_range_end = event_date + pd.Timedelta(days=7)
    
    # Filter oil DataFrame for the specified date range
    filtered_data = tech[(tech['Date'] > event_date) & (oil['Date'] <= date_range_end)]
    
    # Add the 'Event_date' and 'Type' columns from events DataFrame to each row
    filtered_data['Event_date'] = event_date
    filtered_data['Type'] = event_row['Type']
    
    # Append the filtered data to the oilb DataFrame
    techa = pd.concat([techa, filtered_data])

# Reset index of the resulting DataFrame
techa = techa.reset_index(drop=True)



In [None]:
techa

In [None]:
techb

In [None]:
#add the columns of price change and percent change
techb['Price Change'] = techb['Close'] - techb['Open']
techb['Percent Change'] = ((techb['Close'] - techb['Open']) / techb['Open']) * 100


In [None]:
techa['Price Change'] = techa['Close'] - techa['Open']
techa['Percent Change'] = ((techa['Close'] - techa['Open']) / techa['Open']) * 100


## average price  tech

In [None]:
# average price change techb befroe

sns.set_palette("colorblind")

# Group the DataFrame by 'Type' and calculate the mean effect size for each measurement
avg_pricing = techb.groupby('Type')[['Open', 'High', 'Low', 'Close']].mean()

# Create a bar plot
plt.figure(figsize=(20, 15))
ax=avg_pricing.plot(kind='barh');

# Adding labels and title
plt.xlabel('Event Type')
plt.ylabel('Average Price')
plt.title('Average Tech Prices Before Events')
ax.legend(loc='upper right')
# Show the plot
plt.tight_layout()
plt.show()

In [None]:
# average price techa after

sns.set_palette("colorblind")

# Group the DataFrame by 'Type' and calculate the mean effect size for each measurement
avg_pricing = techa.groupby('Type')[['Open', 'High', 'Low', 'Close']].mean()

# Create a bar plot
plt.figure(figsize=(20, 15))
ax=avg_pricing.plot(kind='barh');

# Adding labels and title
plt.xlabel('Event Type')
plt.ylabel('Average Price')
plt.title('Average Tech Prices After Events')
ax.legend(loc='upper right')
# Show the plot
plt.tight_layout()
plt.show()

## correlation tech before and after

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

# Calculate the correlation matrix grouped by 'Type'
correlation_matrix_grouped = techb.groupby('Type')[['Open','High','Low', 'Close']].corr()

# Create a heatmap
plt.figure(figsize=(10, 8))
sns.heatmap(correlation_matrix_grouped, annot=True, cmap='coolwarm', center=0)
plt.title('Correlation Heatmap for Tech Before Event Prices Grouped by Type')
plt.show()


In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

# Calculate the correlation matrix grouped by 'Type'
correlation_matrix_grouped = techa.groupby('Type')[['Open','High','Low', 'Close']].corr()

# Create a heatmap
plt.figure(figsize=(10, 8))
sns.heatmap(correlation_matrix_grouped, annot=True, cmap='coolwarm', center=0)
plt.title('Correlation Heatmap for Tech After Event Prices Grouped by Type')
plt.show()


In [None]:
## effect size 

In [None]:
techb.head(2)

In [None]:
techa.head(2)

1. Retrieve the averages of industry through Types 5-7 days before the event has occurred.

In [None]:
before_techav= techb.groupby('Type').agg({'Open':'mean','High':'mean','Low':'mean','Close':'mean'})


In [None]:
after_techav= techa.groupby('Type').agg({'Open':'mean','High':'mean','Low':'mean','Close':'mean'})

In [None]:
after_techav

In [None]:
before_techav

In [None]:
prct_techb=techb.groupby('Type')['Percent Change'].mean().to_frame().reset_index()
prct_techa=techa.groupby('Type')['Percent Change'].mean().to_frame().reset_index()
prct_oilb=oilb.groupby('Type')['Percent Change'].mean().to_frame().reset_index()
prct_oila=oila.groupby('Type')["Percent Change"].mean().to_frame().reset_index()

### Cohen effect size of tech industry

In [None]:
grouped_techb = techb.groupby('Type')
grouped_techa = techa.groupby('Type')
measurements = ['Open', 'High', 'Low', 'Close']
effect_sizes_dict = {'Type': []}

for measurement in measurements:
    effect_sizes_dict[measurement] = []


for group, group_data_techb in grouped_techb:
    group_data_techa = grouped_techa.get_group(group)
    
    effect_sizes_dict['Type'].append(group)
    
    for measurement in measurements:
        mean_techb = group_data_techb[measurement].mean()
        mean_techa = group_data_techa[measurement].mean()
        pooled_std = np.sqrt((group_data_techb[measurement].std()**2 + group_data_techa[measurement].std()**2) / 2)
        
        effect_size = (mean_oila - mean_oilb) / pooled_std
        effect_sizes_dict[measurement].append(effect_size)
effect_cohentech = pd.DataFrame(effect_sizes_dict)




In [None]:
effect_cohentech

In [None]:
# cohens effect size tech
# Set the color palette
sns.set_palette("colorblind")

# Group the DataFrame by 'Type' and calculate the mean effect size for each measurement
avg_effect_size_by_type = effect_cohentech.groupby('Type')[['Open', 'High', 'Low', 'Close']].mean()

# Create a bar plot
plt.figure(figsize=(22, 55))
avg_effect_size_by_type.plot(kind='barh')

# Adding labels and title
plt.xlabel('Event Type')
plt.ylabel('Average Tech Effect Size')
plt.title('Average Tech Effect Size by Event Type')

# Show the plot
plt.tight_layout()
plt.show()
;

In [None]:
### compare effect size of both industries open

In [None]:

# Extract the types from the data frames
types = effect_cohentech['Type']

# Set the width of the bars
bar_width = 0.35

# Define the positions for the bars
ind = np.arange(len(types))

# Create a bar graph for effect size comparison by type
plt.figure(figsize=(12, 6))
plt.bar(ind, effect_cohentech['Open'], width=bar_width, label='Tech',color='green')
plt.bar(ind + bar_width, effect_cohen['Open'], width=bar_width, label='Oil',color='magenta')

# Label settings
plt.xlabel('Type')
plt.ylabel('Effect Size (Open)')
plt.title('Effect Size Comparison by Industry and Type (Open)')
plt.xticks(ind + bar_width / 2, types, rotation=45)
plt.legend()

plt.tight_layout()
plt.show()


In [None]:
### compare effect size of industry based on closing price

In [None]:
# Extract the types from the data frames
types = effect_cohentech['Type']

# Set the width of the bars
bar_width = 0.35

# Define the positions for the bars
ind = np.arange(len(types))

# Create a bar graph for effect size comparison by type
plt.figure(figsize=(12, 6))
plt.bar(ind, effect_cohentech['Close'], width=bar_width, label='Tech',color='green')
plt.bar(ind + bar_width, effect_cohen['Close'], width=bar_width, label='Oil',color='magenta')

# Label settings
plt.xlabel('Type')
plt.ylabel('Effect Size (Close)')
plt.title('Effect Size Comparison by Industry and Type (Close)')
plt.xticks(ind + bar_width / 2, types, rotation=45)
plt.legend()

plt.tight_layout()
plt.show()


In [None]:
# Extract the types from the data frames
types = effect_cohentech['Type']

# Set the width of the bars
bar_width = 0.35

# Define the positions for the bars
ind = np.arange(len(types))

# Create a bar graph for effect size comparison by type
plt.figure(figsize=(12, 6))
plt.bar(ind, effect_cohentech['High'], width=bar_width, label='Tech',color='green')
plt.bar(ind + bar_width, effect_cohen['High'], width=bar_width, label='Oil',color='magenta')

# Label settings
plt.xlabel('Type')
plt.ylabel('Effect Size (High)')
plt.title('Effect Size Comparison by Industry and Type (High)')
plt.xticks(ind + bar_width / 2, types, rotation=45)
plt.legend()

plt.tight_layout()
plt.show()

In [None]:
# Extract the types from the data frames
types = effect_cohentech['Type']

# Set the width of the bars
bar_width = 0.35

# Define the positions for the bars
ind = np.arange(len(types))

# Create a bar graph for effect size comparison by type
plt.figure(figsize=(12, 6))
plt.bar(ind, effect_cohentech['Low'], width=bar_width, label='Tech',color='green')
plt.bar(ind + bar_width, effect_cohen['Low'], width=bar_width, label='Oil',color='magenta')

# Label settings
plt.xlabel('Type')
plt.ylabel('Effect Size (Low)')
plt.title('Effect Size Comparison by Industry and Type (Low)')
plt.xticks(ind + bar_width / 2, types, rotation=45)
plt.legend()

plt.tight_layout()
plt.show()

In [None]:
oilb

In [None]:
oila

In [None]:
techb

In [None]:
techa

In [None]:
effect_cohen

In [None]:
effect_cohentech

In [None]:

from scipy.stats import ttest_ind



grouped = techb.groupby('Type')


columns_to_test = ['Open', 'High', 'Low', 'Close']


p_values_techb = pd.DataFrame(columns=['Column', 'Type', 'P-Value'])

for column in columns_to_test:
    for group, group_data in grouped:
        group_values = group_data[column]
        p_value = ttest_ind(group_values, techb[column]).pvalue
        
        p_values_techb.loc[len(p_values_techb)] = [column, group, p_value]

# Display the DataFrame with p-values



In [None]:
p_values_techb

In [None]:
from scipy.stats import ttest_ind


# Group the DataFrame by 'Type'
grouped = techa.groupby('Type')


columns_to_test = ['Open', 'High', 'Low', 'Close']

p_values_techa = pd.DataFrame(columns=['Column', 'Type', 'P-Value'])

# Calculate p-values for each column and each group
for column in columns_to_test:
    for group, group_data in grouped:
        group_values = group_data[column]
        p_value = ttest_ind(group_values, techa[column]).pvalue
        
        p_values_techa.loc[len(p_values_techa)] = [column, group, p_value]



In [None]:
p_values_techa

In [None]:
av_techb= techb.groupby('Type').agg({'Open':'mean','High':'mean','Low':'mean','Close':'mean'}).reset_index()

In [None]:
av_tercha= techa.groupby('Type').agg({'Open':'mean','High':'mean','Low':'mean','Close':'mean'}).reset_index()

In [None]:
av_techb

In [None]:
av_tercha

In [None]:
grouped_oilb = techb.groupby('Type')
grouped_oila = techa.groupby('Type')

measurements = ['Open', 'High', 'Low', 'Close']

effect_sizes_dict = {'Type': []}

for measurement in measurements:
    effect_sizes_dict[measurement] = []

for group, group_data_oilb in grouped_oilb:
    group_data_oila = grouped_techa.get_group(group)
    
    effect_sizes_dict['Type'].append(group)
    
    for measurement in measurements:
        mean_oilb = group_data_oilb[measurement].mean()
        mean_oila = group_data_oila[measurement].mean()
        std_oilb = group_data_oilb[measurement].std()
        std_oila = group_data_oila[measurement].std()
        n_oilb = len(group_data_oilb)
        n_oila = len(group_data_oila)
        
        # Calculate Hedges' g effect size for the current measurement and group
        pooled_std = np.sqrt(((std_oilb ** 2 * (n_oilb - 1)) + (std_oila ** 2 * (n_oila - 1))) / (n_oilb + n_oila - 2))
        hedges_g = (mean_oila - mean_oilb) / pooled_std
        
        # Store the effect size for the current measurement
        effect_sizes_dict[measurement].append(hedges_g)

# Create a DataFrame from the effect_sizes_dict
effect_hedge = pd.DataFrame(effect_sizes_dict)


In [None]:
effect_hedge

In [None]:
# Set the color palette
sns.set_palette("colorblind")

# Group the DataFrame by 'Type' and calculate the mean effect size for each measurement
avg_pricing = techa.groupby('Type')[['Open', 'High', 'Low', 'Close']].mean()

# Create a larger bar plot for presentation
plt.figure(figsize=(30, 15))  # Adjust the width and height as needed
ax = avg_pricing.plot(kind='barh')

# Adding labels and title
plt.xlabel('Event Type')
plt.ylabel('Average Price')
plt.title('Average Tech Prices After Events')

# Move the legend to the upper right corner
ax.legend(loc='upper right')

# Show the plot
plt.tight_layout()
plt.show()

In [None]:
# Set the color palette
sns.set_palette("colorblind")

avg_pricing = techb.groupby('Type')[['Open', 'High', 'Low', 'Close']].mean()

# Create a larger bar plot for presentation
plt.figure(figsize=(30, 15))  # Adjust the width and height as needed
ax = avg_pricing.plot(kind='barh')

# Adding labels and title
plt.xlabel('Event Type')
plt.ylabel('Average Price')
plt.title('Average Tech Prices Before Events')

# Move the legend to the upper right corner
ax.legend(loc='upper right')

plt.tight_layout()
plt.show()

In [None]:
# open close comparison average price