In [8]:
import pandas as pd


csv_file_path = 'Data/Goldman_Sachs_Trends.csv'
output_file_path = 'Data/G_S_Peaks.csv'

df = pd.read_csv(csv_file_path)

df['Time'] = pd.to_datetime(df['Time'], format='%Y-%m-%dT%H')

# Filter the data to keep rows where the value is greater than its left and right neighbors
filtered_df = df[(df['Goldman Sachs'] > df['Goldman Sachs'].shift(1)) & (df['Goldman Sachs'] > df['Goldman Sachs'].shift(-1))]

print(filtered_df)

                   Time  Goldman Sachs
3   2023-10-29 01:00:00             26
8   2023-10-29 06:00:00             25
12  2023-10-29 10:00:00             26
16  2023-10-29 14:00:00             25
18  2023-10-29 16:00:00             27
22  2023-10-29 20:00:00             30
27  2023-10-30 01:00:00             37
30  2023-10-30 04:00:00             31
37  2023-10-30 11:00:00             51
40  2023-10-30 14:00:00             52
46  2023-10-30 20:00:00             36
49  2023-10-30 23:00:00             41
54  2023-10-31 04:00:00             29
59  2023-10-31 09:00:00             50
62  2023-10-31 12:00:00             54
64  2023-10-31 14:00:00             52
72  2023-10-31 22:00:00             41
75  2023-11-01 01:00:00             39
78  2023-11-01 04:00:00             36
87  2023-11-01 13:00:00             60
89  2023-11-01 15:00:00             60
94  2023-11-01 20:00:00             43
97  2023-11-01 23:00:00             43
99  2023-11-02 01:00:00             43
101 2023-11-02 03:00:00  

In [10]:
peak_indices = df.index[df['Goldman Sachs'] > df['Goldman Sachs'].shift(1)]

differences = {}

for peak_index in peak_indices:
    peak_value = df.at[peak_index, 'Goldman Sachs']
    previous_value = df.at[peak_index - 1, 'Goldman Sachs']
    next_value = df.at[peak_index + 1, 'Goldman Sachs']

    diff_to_previous = peak_value - previous_value
    diff_to_next = peak_value - next_value
    

    abs_diff_to_previous = abs(diff_to_previous)
    abs_diff_to_next = abs(diff_to_next)
    
    differences[peak_index] = {
        'AbsoluteDifferenceToPrevious': abs_diff_to_previous,
        'AbsoluteDifferenceToNext': abs_diff_to_next
    }

differences_df = pd.DataFrame.from_dict(differences, orient='index')

count_abs_diff_to_previous = differences_df['AbsoluteDifferenceToPrevious'].value_counts()
count_abs_diff_to_next = differences_df['AbsoluteDifferenceToNext'].value_counts()

tally_df = pd.DataFrame({
    'Count Absolute_Difference_To_Previous': count_abs_diff_to_previous,
    'Count Absolute_Difference_To_Next': count_abs_diff_to_next
})

print(tally_df)

    Count Absolute_Difference_To_Previous  Count Absolute_Difference_To_Next
0                                     NaN                                4.0
1                                    10.0                               13.0
2                                    22.0                               15.0
3                                    10.0                               15.0
4                                    10.0                                5.0
5                                    11.0                                7.0
6                                     3.0                                5.0
7                                     3.0                                2.0
8                                     3.0                                4.0
9                                     2.0                                3.0
10                                    NaN                                1.0
11                                    1.0                                2.0

In [12]:
# Convert the dictionary to a pandas DataFrame
differences_df = pd.DataFrame.from_dict(differences, orient='index')

filtered_peaks_df = filtered_df.loc[
    (differences_df['AbsoluteDifferenceToPrevious'] >= 4) &
    (differences_df['AbsoluteDifferenceToNext'] >= 4)
]

print(filtered_peaks_df)


                   Time  Goldman Sachs
59  2023-10-31 09:00:00             50
75  2023-11-01 01:00:00             39
101 2023-11-02 03:00:00             44
107 2023-11-02 09:00:00             68
112 2023-11-02 14:00:00            100
120 2023-11-02 22:00:00             51
129 2023-11-03 07:00:00             59
137 2023-11-03 15:00:00             55
148 2023-11-04 02:00:00             40
163 2023-11-04 17:00:00             24


In [14]:
filtered_peaks_df.to_csv(output_file_path, index=False)