In [310]:
pip install lxml

Note: you may need to restart the kernel to use updated packages.


In [311]:
from bs4 import BeautifulSoup
import numpy as np
import pandas as pd
import requests
import lxml
import re

## Meeting Plan for Nicolas Ouellet and Benedicte Knudson:
We colaborated over Zoom and in person **four** times and used a shared Github Repository that can be found [here](https://github.com/NickOuellet/Fly-Me-To-The-Moon). <br />
We met: <br />
10/11: 5pm-7pm <br />
10/20: 10am-12pm <br />
10/21: 7-11pm <br />
10/25: 7-10pm <br />


___


## Step 1 - Scrape SpaceWeatherLive.com

In [312]:
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
r = requests.get("https://www.spaceweatherlive.com/en/solar-activity/top-50-solar-flares.html",timeout=10,headers=headers)
r.status_code

200

In [313]:
text_response = r.content #https://stackoverflow.com/questions/52389692/beautifulsoup-and-prettify-function
soup = BeautifulSoup(text_response,"html.parser")

In [314]:
table = soup.findAll("table") #find the table in the HTML

In [315]:
solar_flare_df = pd.read_html(str(table))

In [316]:
solfla_df = solar_flare_df[0]#works when we just select the first table

In [317]:
solfla_df.columns = ["rank", "x_class", "date", "region", "start_time", "maximum_time", "end_time", "movie"] #rename columns to be more descriptive

In [318]:
solfla_df

Unnamed: 0,rank,x_class,date,region,start_time,maximum_time,end_time,movie
0,1,X28+,2003/11/04,486,19:29,19:53,20:06,MovieView archive
1,2,X20+,2001/04/02,9393,21:32,21:51,22:03,MovieView archive
2,3,X17.2+,2003/10/28,486,09:51,11:10,11:24,MovieView archive
3,4,X17+,2005/09/07,808,17:17,17:40,18:03,MovieView archive
4,5,X14.4,2001/04/15,9415,13:19,13:50,13:55,MovieView archive
5,6,X10,2003/10/29,486,20:37,20:49,21:01,MovieView archive
6,7,X9.4,1997/11/06,8100,11:49,11:55,12:01,MovieView archive
7,8,X9.3,2017/09/06,2673,11:53,12:02,12:10,MovieView archive
8,9,X9,2006/12/05,930,10:18,10:35,10:45,MovieView archive
9,10,X8.3,2003/11/02,486,17:03,17:25,17:39,MovieView archive


___
## Step 2 - Tidy the Top 50 Solar Flare Data

In [319]:
solfla_df = solfla_df[["rank", "x_class", "date", "start_time", "maximum_time", "end_time","region"]]
solfla_df.head()

Unnamed: 0,rank,x_class,date,start_time,maximum_time,end_time,region
0,1,X28+,2003/11/04,19:29,19:53,20:06,486
1,2,X20+,2001/04/02,21:32,21:51,22:03,9393
2,3,X17.2+,2003/10/28,09:51,11:10,11:24,486
3,4,X17+,2005/09/07,17:17,17:40,18:03,808
4,5,X14.4,2001/04/15,13:19,13:50,13:55,9415


In [320]:
import datetime

In [321]:

solfla_df.loc[:,"start_time"] = pd.to_datetime(solfla_df.loc[:,"date"] + " " + solfla_df.loc[:,"start_time"])
solfla_df.loc[:,"maximum_time"] = pd.to_datetime(solfla_df.loc[:,"date"] + " " + solfla_df.loc[:,"maximum_time"])
solfla_df.loc[:,"end_time"] = pd.to_datetime(solfla_df.loc[:,"date"] + " " + solfla_df.loc[:,"end_time"])


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(ilocs[0], value, pi)


In [322]:
solfla_df.drop(columns="date",inplace=True) #drop date table
solfla_df.rename(columns={"start_time":"start_datetime", "end_time":"end_datetime","maximum_time":"maximum_datetime"},inplace=True)
solfla_df.head()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(


Unnamed: 0,rank,x_class,start_datetime,maximum_datetime,end_datetime,region
0,1,X28+,2003-11-04 19:29:00,2003-11-04 19:53:00,2003-11-04 20:06:00,486
1,2,X20+,2001-04-02 21:32:00,2001-04-02 21:51:00,2001-04-02 22:03:00,9393
2,3,X17.2+,2003-10-28 09:51:00,2003-10-28 11:10:00,2003-10-28 11:24:00,486
3,4,X17+,2005-09-07 17:17:00,2005-09-07 17:40:00,2005-09-07 18:03:00,808
4,5,X14.4,2001-04-15 13:19:00,2001-04-15 13:50:00,2001-04-15 13:55:00,9415


In [323]:
def remove_plus(string):
  return re.sub("\+", "", string,1)

def make_float(string): #make all number values in rating into floats to make searching easier later
  rating = string[0]
  string = string[1:]
  new_float = float(string)
  return rating + str(new_float)

  
solfla_df["x_class"] = solfla_df["x_class"].apply(remove_plus)
solfla_df["x_class"] = solfla_df["x_class"].apply(make_float)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  solfla_df["x_class"] = solfla_df["x_class"].apply(remove_plus)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  solfla_df["x_class"] = solfla_df["x_class"].apply(make_float)


In [324]:
solfla_df.dtypes

rank                         int64
x_class                     object
start_datetime      datetime64[ns]
maximum_datetime    datetime64[ns]
end_datetime        datetime64[ns]
region                       int64
dtype: object

In [325]:
solfla_df

Unnamed: 0,rank,x_class,start_datetime,maximum_datetime,end_datetime,region
0,1,X28.0,2003-11-04 19:29:00,2003-11-04 19:53:00,2003-11-04 20:06:00,486
1,2,X20.0,2001-04-02 21:32:00,2001-04-02 21:51:00,2001-04-02 22:03:00,9393
2,3,X17.2,2003-10-28 09:51:00,2003-10-28 11:10:00,2003-10-28 11:24:00,486
3,4,X17.0,2005-09-07 17:17:00,2005-09-07 17:40:00,2005-09-07 18:03:00,808
4,5,X14.4,2001-04-15 13:19:00,2001-04-15 13:50:00,2001-04-15 13:55:00,9415
5,6,X10.0,2003-10-29 20:37:00,2003-10-29 20:49:00,2003-10-29 21:01:00,486
6,7,X9.4,1997-11-06 11:49:00,1997-11-06 11:55:00,1997-11-06 12:01:00,8100
7,8,X9.3,2017-09-06 11:53:00,2017-09-06 12:02:00,2017-09-06 12:10:00,2673
8,9,X9.0,2006-12-05 10:18:00,2006-12-05 10:35:00,2006-12-05 10:45:00,930
9,10,X8.3,2003-11-02 17:03:00,2003-11-02 17:25:00,2003-11-02 17:39:00,486


___
## Step 3 - Scrape the NASA Data

In [326]:
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
r = requests.get("https://cdaw.gsfc.nasa.gov/CME_list/radio/waves_type2.html",timeout=10,headers=headers)
r.status_code

200

In [327]:
text_response = r.content #https://stackoverflow.com/questions/52389692/beautifulsoup-and-prettify-function
soup = BeautifulSoup(text_response,"html.parser")
table = soup.get_text()
split_table = table.splitlines()
split_table = split_table[15:]
split_table = split_table[0:len(split_table)-2] #clean up table by removing top and bottom portion


In [328]:
nasa_df = pd.DataFrame( #create empty DF 
    columns = ["start_date","start_time","end_date","end_time","start_freq","end_freq","location","region","xray_importance","cme_date","cme_time","central_pos_angle","cpa_width","cme_speed","PHTX" ]
    )
for i,v in enumerate(split_table): 
  row_vals = re.findall(r"\S+",v)
  nasa_df.loc[i] = row_vals[0:15] #assign values for each row from list after splitting string into a list var, also drop 16th entry because sometimes it is useless notation

In [329]:
naas_df = nasa_df.reindex(columns=["cme_date","cme_time","central_pos_angle","end_date","end_freq","end_time","location","region","xray_importance","cme_speed","start_date", "start_time", "start_freq","cpa_width","PHTX"])
nasa_df

Unnamed: 0,start_date,start_time,end_date,end_time,start_freq,end_freq,location,region,xray_importance,cme_date,cme_time,central_pos_angle,cpa_width,cme_speed,PHTX
0,1997/04/01,14:00,04/01,14:15,8000,4000,S25E16,8026,M1.3,04/01,15:18,74,79,312,PHTX
1,1997/04/07,14:30,04/07,17:30,11000,1000,S28E19,8027,C6.8,04/07,14:27,Halo,360,878,PHTX
2,1997/05/12,05:15,05/14,16:00,12000,80,N21W08,8038,C1.3,05/12,05:30,Halo,360,464,PHTX
3,1997/05/21,20:20,05/21,22:00,5000,500,N05W12,8040,M1.3,05/21,21:00,263,165,296,PHTX
4,1997/09/23,21:53,09/23,22:16,6000,2000,S29E25,8088,C1.4,09/23,22:02,133,155,712,PHTX
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
517,2017/09/17,11:45,09/17,12:35,16000,900,S08E170,-----,----,09/17,12:00,Halo,360,1385,PHTX
518,2017/10/18,05:48,10/18,12:40,16000,400,S06E123,-----,----,10/18,08:00,85,146,1001,PHTX
519,2019/05/03,23:52,05/04,00:16,13000,2300,N12E82,12740,C1.0,05/03,23:24,90,113,692,PHTX
520,2020/11/29,13:07,11/29,15:23,14000,850,S23E89,-----,M4.4,11/29,13:25,Halo,360,2077,----


In [330]:
nasa_df.drop(columns=["PHTX"],inplace=True) #drop links column

In [331]:
nasa_df

Unnamed: 0,start_date,start_time,end_date,end_time,start_freq,end_freq,location,region,xray_importance,cme_date,cme_time,central_pos_angle,cpa_width,cme_speed
0,1997/04/01,14:00,04/01,14:15,8000,4000,S25E16,8026,M1.3,04/01,15:18,74,79,312
1,1997/04/07,14:30,04/07,17:30,11000,1000,S28E19,8027,C6.8,04/07,14:27,Halo,360,878
2,1997/05/12,05:15,05/14,16:00,12000,80,N21W08,8038,C1.3,05/12,05:30,Halo,360,464
3,1997/05/21,20:20,05/21,22:00,5000,500,N05W12,8040,M1.3,05/21,21:00,263,165,296
4,1997/09/23,21:53,09/23,22:16,6000,2000,S29E25,8088,C1.4,09/23,22:02,133,155,712
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
517,2017/09/17,11:45,09/17,12:35,16000,900,S08E170,-----,----,09/17,12:00,Halo,360,1385
518,2017/10/18,05:48,10/18,12:40,16000,400,S06E123,-----,----,10/18,08:00,85,146,1001
519,2019/05/03,23:52,05/04,00:16,13000,2300,N12E82,12740,C1.0,05/03,23:24,90,113,692
520,2020/11/29,13:07,11/29,15:23,14000,850,S23E89,-----,M4.4,11/29,13:25,Halo,360,2077


___
## Step 4 - Tidy the Nasa Data

**Replace all empty values with NaNs**

In [332]:
nasa_df.region.replace("-----", np.nan,inplace=True)
nasa_df.xray_importance.replace("----", np.nan,inplace=True)
nasa_df.cme_date.replace("--/--",np.nan, inplace=True)
nasa_df.cme_time.replace("--:--",np.nan,inplace=True)
nasa_df.central_pos_angle.replace("----", np.nan,inplace=True)
nasa_df.cpa_width.replace("----", np.nan,inplace=True)
nasa_df.cpa_width.replace("---", np.nan,inplace=True)
nasa_df.cpa_width.replace("360h", "360",inplace=True)
nasa_df.cme_speed.replace("----", np.nan,inplace=True)
nasa_df.start_freq.replace("????",np.nan,inplace=True)
nasa_df.end_freq.replace("????",np.nan,inplace=True)



**Replace all lower case "Back" values with upper case "BACK" in location column**

In [333]:
nasa_df.location.replace("Back","BACK",inplace=True)

**Replace all DSF values with FILA in region column**

In [334]:
nasa_df.region.replace("DSF","FILE",inplace=True)

**Create column to indicate HALO flare, then turn Halo entries into NaN.**<br/>
Halo = 1 <br/>
Non-Halo = 0

In [335]:
column_vals = []
for index,value in nasa_df["central_pos_angle"].iteritems():
  if value == "Halo":
    column_vals.append(True)
  else:
    column_vals.append(False)
nasa_df["is_halo"] = column_vals

In [336]:
nasa_df.central_pos_angle.replace("Halo", np.nan,inplace=True)

**Indicate which cpa_widths are lower bounds, make a new column with the info, then remove the non-numeric characters**

In [337]:
column_vals = []
for index,value in nasa_df["cpa_width"].iteritems():
  if pd.isnull(value) == True:
    column_vals.append(True)
  elif ">" in value:
        column_vals.append(False)
  else:
    column_vals.append(False)
nasa_df["lower_bound"] = column_vals

In [338]:
def remove_sign(str):
  if pd.isnull(str) == True:
    return str
  return re.sub(">","",str)

nasa_df["cpa_width"] = nasa_df["cpa_width"].apply(remove_sign)

**Reformat date columns and drop repetitive data**

In [339]:
nasa_df.replace("24:00","23:59",inplace=True)

In [340]:
for index,value in nasa_df["end_date"].iteritems():
  year = nasa_df.start_date.loc[index][0:4]
  nasa_df["end_date"].loc[index] = pd.to_datetime(year + "/" + nasa_df["end_date"].loc[index] + " " + nasa_df["end_time"].loc[index])

for index,value in nasa_df["cme_date"].iteritems():
  year = nasa_df.start_date.loc[index][0:4]
  if pd.isnull(nasa_df["cme_date"].loc[index]):
    pass
  else:
    nasa_df["cme_date"].loc[index] = pd.to_datetime(year + "/" + nasa_df["cme_date"].loc[index] + " " + nasa_df["cme_time"].loc[index])

nasa_df.loc[:,"start_date"] = pd.to_datetime(nasa_df.loc[:,"start_date"] + " " + nasa_df.loc[:,"start_time"])
nasa_df.loc[:,"end_date"] = pd.to_datetime(nasa_df.loc[:,"end_date"])
nasa_df.loc[:,"cme_date"] = pd.to_datetime(nasa_df.loc[:,"cme_date"])


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)


In [341]:
nasa_df.rename(columns={"start_date":"start_datetime", "end_date":"end_datetime","cme_date":"cme_datetime"},inplace=True) #rename columns

In [342]:
nasa_df.drop(columns=["start_time","end_time","cme_time"],inplace=True) #drop unnessecary values

In [343]:
nasa_df.start_freq = nasa_df.start_freq.astype('float') #change all str types to float
nasa_df.end_freq = nasa_df.end_freq.astype('float')
nasa_df.central_pos_angle = nasa_df.central_pos_angle.astype('float')
nasa_df.cme_speed = nasa_df.cme_speed.astype('float')
nasa_df.cpa_width = nasa_df.cpa_width.astype('float')

**Adjust xray_importance column to have 0 after "." value for easier searching later**

In [344]:
nasa_df.dtypes

start_datetime       datetime64[ns]
end_datetime         datetime64[ns]
start_freq                  float64
end_freq                    float64
location                     object
region                       object
xray_importance              object
cme_datetime         datetime64[ns]
central_pos_angle           float64
cpa_width                   float64
cme_speed                   float64
is_halo                        bool
lower_bound                    bool
dtype: object

In [401]:
nasa_df

Unnamed: 0,start_datetime,end_datetime,start_freq,end_freq,location,region,xray_importance,cme_datetime,central_pos_angle,cpa_width,cme_speed,is_halo,lower_bound
0,1997-04-01 14:00:00,1997-04-01 14:15:00,8000.0,4000.0,S25E16,8026,M1.3,1997-04-01 15:18:00,74.0,79.0,312.0,False,False
1,1997-04-07 14:30:00,1997-04-07 17:30:00,11000.0,1000.0,S28E19,8027,C6.8,1997-04-07 14:27:00,,360.0,878.0,True,False
2,1997-05-12 05:15:00,1997-05-14 16:00:00,12000.0,80.0,N21W08,8038,C1.3,1997-05-12 05:30:00,,360.0,464.0,True,False
3,1997-05-21 20:20:00,1997-05-21 22:00:00,5000.0,500.0,N05W12,8040,M1.3,1997-05-21 21:00:00,263.0,165.0,296.0,False,False
4,1997-09-23 21:53:00,1997-09-23 22:16:00,6000.0,2000.0,S29E25,8088,C1.4,1997-09-23 22:02:00,133.0,155.0,712.0,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
517,2017-09-17 11:45:00,2017-09-17 12:35:00,16000.0,900.0,S08E170,,,2017-09-17 12:00:00,,360.0,1385.0,True,False
518,2017-10-18 05:48:00,2017-10-18 12:40:00,16000.0,400.0,S06E123,,,2017-10-18 08:00:00,85.0,146.0,1001.0,False,False
519,2019-05-03 23:52:00,2019-05-04 00:16:00,13000.0,2300.0,N12E82,12740,C1.0,2019-05-03 23:24:00,90.0,113.0,692.0,False,False
520,2020-11-29 13:07:00,2020-11-29 15:23:00,14000.0,850.0,S23E89,,M4.4,2020-11-29 13:25:00,,360.0,2077.0,True,False


# Part 2 Analysis:

### Question 1: Replication

In [349]:
nasa_df.head()

Unnamed: 0,start_datetime,end_datetime,start_freq,end_freq,location,region,xray_importance,cme_datetime,central_pos_angle,cpa_width,cme_speed,is_halo,lower_bound
0,1997-04-01 14:00:00,1997-04-01 14:15:00,8000.0,4000.0,S25E16,8026,M1.3,1997-04-01 15:18:00,74.0,79.0,312.0,False,False
1,1997-04-07 14:30:00,1997-04-07 17:30:00,11000.0,1000.0,S28E19,8027,C6.8,1997-04-07 14:27:00,,360.0,878.0,True,False
2,1997-05-12 05:15:00,1997-05-14 16:00:00,12000.0,80.0,N21W08,8038,C1.3,1997-05-12 05:30:00,,360.0,464.0,True,False
3,1997-05-21 20:20:00,1997-05-21 22:00:00,5000.0,500.0,N05W12,8040,M1.3,1997-05-21 21:00:00,263.0,165.0,296.0,False,False
4,1997-09-23 21:53:00,1997-09-23 22:16:00,6000.0,2000.0,S29E25,8088,C1.4,1997-09-23 22:02:00,133.0,155.0,712.0,False,False


In [350]:
solfla_df.head()

Unnamed: 0,rank,x_class,start_datetime,maximum_datetime,end_datetime,region
0,1,X28.0,2003-11-04 19:29:00,2003-11-04 19:53:00,2003-11-04 20:06:00,486
1,2,X20.0,2001-04-02 21:32:00,2001-04-02 21:51:00,2001-04-02 22:03:00,9393
2,3,X17.2,2003-10-28 09:51:00,2003-10-28 11:10:00,2003-10-28 11:24:00,486
3,4,X17.0,2005-09-07 17:17:00,2005-09-07 17:40:00,2005-09-07 18:03:00,808
4,5,X14.4,2001-04-15 13:19:00,2001-04-15 13:50:00,2001-04-15 13:55:00,9415


**We will first gather the entries for the nasa column that are rated of "X" importance as well as the start date for each so that we can compare them to the solarflarelive data.**

In [397]:
flare_lst = []

for index,value in nasa_df.iterrows():
    if pd.isnull(value["xray_importance"]) == False and "X" in value["xray_importance"]:
        flare_lst.append([float(value["xray_importance"][1:]),value["start_datetime"].date()])


1997-11-04
1997-11-06
1997-11-27
1998-04-23
1998-04-27
1998-05-02
1998-05-06
1999-08-28
1999-10-14
2000-02-05
2000-06-06
2000-07-11
2000-07-14
2000-11-24
2000-11-24
2000-11-24
2000-11-25
2000-11-26
2001-03-29
2001-04-02
2001-04-02
2001-04-03
2001-04-06
2001-04-10
2001-04-12
2001-04-15
2001-08-25
2001-09-24
2001-10-19
2001-10-19
2001-10-25
2001-11-04
2001-12-28
2002-04-21
2002-07-18
2002-07-20
2002-07-23
2002-08-03
2002-08-24
2003-03-18
2003-05-27
2003-05-28
2003-05-29
2003-06-16
2003-10-26
2003-10-26
2003-10-28
2003-10-29
2003-11-02
2003-11-03
2003-11-03
2003-11-04
2004-11-07
2004-11-10
2005-01-01
2005-01-15
2005-01-17
2005-01-17
2005-01-19
2005-01-20
2005-07-14
2005-07-30
2005-09-07
2005-09-09
2005-09-10
2005-09-13
2006-12-05
2006-12-06
2006-12-13
2006-12-14
2011-02-15
2011-08-09
2011-09-06
2011-09-22
2012-01-27
2012-03-05
2012-03-07
2012-07-06
2012-07-12
2013-05-13
2013-05-13
2013-05-14
2013-05-15
2013-10-25
2013-11-19
2014-01-07
2014-02-25
2014-06-10
2014-09-10
2015-05-05
2017-09-06

In [398]:
flare_lst.sort(reverse=True) #all X value flares sorted from largest to smallest with dates

In [402]:
replica_df = pd.DataFrame(
    columns = ['start_datetime', 'end_datetime', 'start_freq', 'end_freq', 'location',
       'region', 'xray_importance', 'cme_datetime', 'central_pos_angle',
       'cpa_width', 'cme_speed', 'is_halo', 'lower_bound']
)

In [413]:
for index, value in enumerate(flare_lst):
    rating = value[0]
    date = value[1]
    if (nasa_df["xray_importance"][1:] == rating) & (nasa_df["start_datetime"] == date):
        print("Hello")
        # entry = (nasa_df[nasa_df["xray_importance"][1:] == rating]) & (nasa_df[nasa_df["start_datetime"].date() == date])
        # print(entry)
        # replica_df.append(entry)

    

In [428]:
for index, value in solfla_df.iterrows():
    for rating, date in flare_lst:
        print(float(value["x_class"][1:]))
        print(rating)
        # if (float(value["x_class"][1:] == rating)) and (value["start_datetime"].date() == date):
        #     replica_df.append(value)
        if (float(value["x_class"][1:] == rating)):
            print("match")

28.0
28.0
28.0
20.0
28.0
17.0
28.0
14.0
28.0
10.0
28.0
9.4
28.0
9.3
28.0
9.0
28.0
8.3
28.0
8.3
28.0
7.1
28.0
6.9
28.0
6.5
28.0
6.2
28.0
5.7
28.0
5.6
28.0
5.4
28.0
5.3
28.0
4.9
28.0
4.8
28.0
4.0
28.0
3.9
28.0
3.8
28.0
3.6
28.0
3.4
28.0
3.4
28.0
3.3
28.0
3.2
28.0
3.1
28.0
2.8
28.0
2.7
28.0
2.7
28.0
2.7
28.0
2.6
28.0
2.6
28.0
2.6
28.0
2.5
28.0
2.3
28.0
2.3
28.0
2.3
28.0
2.2
28.0
2.1
28.0
2.1
28.0
2.1
28.0
2.1
28.0
2.0
28.0
2.0
28.0
2.0
28.0
2.0
28.0
1.9
28.0
1.8
28.0
1.8
28.0
1.8
28.0
1.7
28.0
1.7
28.0
1.7
28.0
1.7
28.0
1.7
28.0
1.6
28.0
1.6
28.0
1.6
28.0
1.5
28.0
1.5
28.0
1.5
28.0
1.5
28.0
1.5
28.0
1.4
28.0
1.4
28.0
1.3
28.0
1.3
28.0
1.3
28.0
1.3
28.0
1.3
28.0
1.2
28.0
1.2
28.0
1.2
28.0
1.2
28.0
1.2
28.0
1.2
28.0
1.2
28.0
1.2
28.0
1.2
28.0
1.1
28.0
1.1
28.0
1.1
28.0
1.1
28.0
1.1
28.0
1.0
28.0
1.0
28.0
1.0
28.0
1.0
28.0
1.0
20.0
28.0
20.0
20.0
20.0
17.0
20.0
14.0
20.0
10.0
20.0
9.4
20.0
9.3
20.0
9.0
20.0
8.3
20.0
8.3
20.0
7.1
20.0
6.9
20.0
6.5
20.0
6.2
20.0
5.7
20.0
5.6
20.0
5.4
20.0
5.3


In [421]:
replica_df

Unnamed: 0,start_datetime,end_datetime,start_freq,end_freq,location,region,xray_importance,cme_datetime,central_pos_angle,cpa_width,cme_speed,is_halo,lower_bound
