### Importing dependencies

In [4]:
import pandas as pd
from bs4 import BeautifulSoup
import requests

### Reading raw data

In [5]:
df = pd.read_excel("resources/UCS-Satellite-Database-1-1-2023.xlsx")
df.head()

Unnamed: 0,"Name of Satellite, Alternate Names",Current Official Name of Satellite,Country/Org of UN Registry,Country of Operator/Owner,Operator/Owner,Users,Purpose,Detailed Purpose,Class of Orbit,Type of Orbit,...,Unnamed: 58,Unnamed: 59,Unnamed: 60,Unnamed: 61,Unnamed: 62,Unnamed: 63,Unnamed: 64,Unnamed: 65,Unnamed: 66,Unnamed: 67
0,1HOPSAT-TD (1st-generation High Optical Perfor...,1HOPSAT-TD,NR,USA,Hera Systems,Commercial,Earth Observation,Infrared Imaging,LEO,Non-Polar Inclined,...,,,,,,,,,,
1,Aalto-1,Aalto-1,Finland,Finland,Aalto University,Civil,Technology Development,,LEO,Sun-Synchronous,...,,,,,,,,,,
2,AAt-4,AAt-4,Denmark,Denmark,University of Aalborg,Civil,Earth Observation,Automatic Identification System (AIS),LEO,Sun-Synchronous,...,,,,,,,,,,
3,"ABS-2 (Koreasat-8, ST-3)",ABS-2,NR,Multinational,Asia Broadcast Satellite Ltd.,Commercial,Communications,,GEO,,...,,,,,,,,,,
4,ABS-2A,ABS-2A,NR,Multinational,Asia Broadcast Satellite Ltd.,Commercial,Communications,,GEO,,...,,,,,,,,,,


In [6]:
# dropping unnecessary columns
dropped_columns = [col for col in df.columns if ('Unnamed' in col or 'Source' in col or 'Comments' in col)]
df_cleaned = df.drop(columns=dropped_columns)
df_cleaned.head()

Unnamed: 0,"Name of Satellite, Alternate Names",Current Official Name of Satellite,Country/Org of UN Registry,Country of Operator/Owner,Operator/Owner,Users,Purpose,Detailed Purpose,Class of Orbit,Type of Orbit,...,Dry Mass (kg.),Power (watts),Date of Launch,Expected Lifetime (yrs.),Contractor,Country of Contractor,Launch Site,Launch Vehicle,COSPAR Number,NORAD Number
0,1HOPSAT-TD (1st-generation High Optical Perfor...,1HOPSAT-TD,NR,USA,Hera Systems,Commercial,Earth Observation,Infrared Imaging,LEO,Non-Polar Inclined,...,,,2019-12-11 00:00:00,0.5,Hera Systems,USA,Satish Dhawan Space Centre,PSLV,2019-089H,44859
1,Aalto-1,Aalto-1,Finland,Finland,Aalto University,Civil,Technology Development,,LEO,Sun-Synchronous,...,,4.5,2017-06-23 00:00:00,2.0,Aalto University,Finland,Satish Dhawan Space Centre,PSLV,2017-036L,42775
2,AAt-4,AAt-4,Denmark,Denmark,University of Aalborg,Civil,Earth Observation,Automatic Identification System (AIS),LEO,Sun-Synchronous,...,,,2016-04-25 00:00:00,,University of Aalborg,Denmark,Guiana Space Center,Soyuz-2.1a,2016-025E,41460
3,"ABS-2 (Koreasat-8, ST-3)",ABS-2,NR,Multinational,Asia Broadcast Satellite Ltd.,Commercial,Communications,,GEO,,...,,16000.0,2014-02-06 00:00:00,15.0,Space Systems/Loral,USA,Guiana Space Center,Ariane 5 ECA,2014-006A,39508
4,ABS-2A,ABS-2A,NR,Multinational,Asia Broadcast Satellite Ltd.,Commercial,Communications,,GEO,,...,,,2016-06-15 00:00:00,15.0,Boeing Satellite Systems,USA,Cape Canaveral,Falcon 9,2016-038A,41588


In [7]:
# check for null values
df_cleaned.isnull().sum()

Name of Satellite, Alternate Names       0
Current Official Name of Satellite       0
Country/Org of UN Registry               0
Country of Operator/Owner                0
Operator/Owner                           0
Users                                    0
Purpose                                  0
Detailed Purpose                      5551
Class of Orbit                           0
Type of Orbit                          641
Longitude of GEO (degrees)               2
Perigee (km)                             0
Apogee (km)                              0
Eccentricity                             0
Inclination (degrees)                    0
Period (minutes)                        44
Launch Mass (kg.)                      243
Dry Mass (kg.)                        6274
Power (watts)                         6137
Date of Launch                           0
Expected Lifetime (yrs.)              1914
Contractor                               0
Country of Contractor                    0
Launch Site

In [8]:
# dropping columns with many null values
df_cleaned.drop(columns=['Power (watts)', 'Dry Mass (kg.)', 'Detailed Purpose'], inplace=True)
df_cleaned.isnull().sum()

Name of Satellite, Alternate Names       0
Current Official Name of Satellite       0
Country/Org of UN Registry               0
Country of Operator/Owner                0
Operator/Owner                           0
Users                                    0
Purpose                                  0
Class of Orbit                           0
Type of Orbit                          641
Longitude of GEO (degrees)               2
Perigee (km)                             0
Apogee (km)                              0
Eccentricity                             0
Inclination (degrees)                    0
Period (minutes)                        44
Launch Mass (kg.)                      243
Date of Launch                           0
Expected Lifetime (yrs.)              1914
Contractor                               0
Country of Contractor                    0
Launch Site                              0
Launch Vehicle                           0
COSPAR Number                            0
NORAD Numbe

In [9]:
# before handling the NaN values of Type of Orbit column, let's see how many Classes of Orbit there Are
df_cleaned["Class of Orbit"].unique()


array(['LEO', 'GEO', 'Elliptical', 'MEO', 'LEo'], dtype=object)

### The four Classes of Orbits 
- GEO (Geostationary Earth Orbits)
- MEO (Medium Earth Orbits)
- LEO (Low Earth Orbits)
- HEO (Highly Elliptical Orbits)

![orbits](resources/images/Elliptical-Orbits.png)

In [10]:
# the classes LEO and LEo should be the same, it seems like a typo, let's see how many LEo Classes are there 
# and replace them with LEO

occurrences = (df_cleaned["Class of Orbit"]=="LEo").sum() # only one occurrence
print(f'LEo occurs {occurrences} times')
df_cleaned.loc[df_cleaned["Class of Orbit"]=="LEo", "Class of Orbit"] = "LEO"
# changing the Elliptical Class of Orbit name to HEO (Highly Elliptical Orbit)
df_cleaned.loc[df_cleaned["Class of Orbit"]=="Elliptical", "Class of Orbit"] = "HEO"
df_cleaned["Class of Orbit"].unique() # now we have four Classes of Orbit

LEo occurs 1 times


array(['LEO', 'GEO', 'HEO', 'MEO'], dtype=object)

In [11]:
# handling missing values for type of orbit
# type of orbit of class of orbit GEO
Type_orbit_class_GEO = df_cleaned.loc[df_cleaned["Class of Orbit"]=="GEO", "Type of Orbit"].unique() 
print(Type_orbit_class_GEO) # all NaN values
# Class of Orbit GEO has NaN Type of Orbit, the corresponding Type of Orbit for the GEO class is Equatorial
df_cleaned.loc[df_cleaned["Class of Orbit"]=="GEO", "Type of Orbit"] = "Equatorial"
df_cleaned["Type of Orbit"].isnull().sum() # 61 remaining NaN values

[nan]


61

In [12]:
# for the remaining null values, I will opt for merging the Class and the Type of orbit onto one column named Orbit
df_cleaned_copie = df_cleaned
def combine_class_type(row):
    if not pd.isnull(row["Type of Orbit"]):
        return row["Class of Orbit"] + " - " + row["Type of Orbit"]
    else:
        return row["Class of Orbit"]
df_cleaned_copie["Orbit"] = df_cleaned_copie.apply(combine_class_type, axis=1)
df_cleaned_copie.drop(columns=["Class of Orbit", "Type of Orbit"], inplace=True)
df_cleaned_copie.head()

Unnamed: 0,"Name of Satellite, Alternate Names",Current Official Name of Satellite,Country/Org of UN Registry,Country of Operator/Owner,Operator/Owner,Users,Purpose,Longitude of GEO (degrees),Perigee (km),Apogee (km),...,Launch Mass (kg.),Date of Launch,Expected Lifetime (yrs.),Contractor,Country of Contractor,Launch Site,Launch Vehicle,COSPAR Number,NORAD Number,Orbit
0,1HOPSAT-TD (1st-generation High Optical Perfor...,1HOPSAT-TD,NR,USA,Hera Systems,Commercial,Earth Observation,0.0,566,576,...,22.0,2019-12-11 00:00:00,0.5,Hera Systems,USA,Satish Dhawan Space Centre,PSLV,2019-089H,44859,LEO - Non-Polar Inclined
1,Aalto-1,Aalto-1,Finland,Finland,Aalto University,Civil,Technology Development,0.0,497,517,...,4.5,2017-06-23 00:00:00,2.0,Aalto University,Finland,Satish Dhawan Space Centre,PSLV,2017-036L,42775,LEO - Sun-Synchronous
2,AAt-4,AAt-4,Denmark,Denmark,University of Aalborg,Civil,Earth Observation,0.0,442,687,...,1.0,2016-04-25 00:00:00,,University of Aalborg,Denmark,Guiana Space Center,Soyuz-2.1a,2016-025E,41460,LEO - Sun-Synchronous
3,"ABS-2 (Koreasat-8, ST-3)",ABS-2,NR,Multinational,Asia Broadcast Satellite Ltd.,Commercial,Communications,75.0,35778,35793,...,6330.0,2014-02-06 00:00:00,15.0,Space Systems/Loral,USA,Guiana Space Center,Ariane 5 ECA,2014-006A,39508,GEO - Equatorial
4,ABS-2A,ABS-2A,NR,Multinational,Asia Broadcast Satellite Ltd.,Commercial,Communications,-75.0,35700,35700,...,1800.0,2016-06-15 00:00:00,15.0,Boeing Satellite Systems,USA,Cape Canaveral,Falcon 9,2016-038A,41588,GEO - Equatorial


In [13]:
df_cleaned_copie.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6718 entries, 0 to 6717
Data columns (total 23 columns):
 #   Column                              Non-Null Count  Dtype  
---  ------                              --------------  -----  
 0   Name of Satellite, Alternate Names  6718 non-null   object 
 1   Current Official Name of Satellite  6718 non-null   object 
 2   Country/Org of UN Registry          6718 non-null   object 
 3   Country of Operator/Owner           6718 non-null   object 
 4   Operator/Owner                      6718 non-null   object 
 5   Users                               6718 non-null   object 
 6   Purpose                             6718 non-null   object 
 7   Longitude of GEO (degrees)          6716 non-null   float64
 8   Perigee (km)                        6718 non-null   int64  
 9   Apogee (km)                         6718 non-null   int64  
 10  Eccentricity                        6718 non-null   float64
 11  Inclination (degrees)               6718 no

In [14]:
# null values for period
# let's first get the list of NORAD Number for the satellites with missing periods
NORAD_missing_periods = list(df_cleaned_copie.loc[df_cleaned_copie["Period (minutes)"].isna(), "NORAD Number"])
# we will try to parse the following website for these satellites periods
base_url = "https://www.n2yo.com/satellite/?s=" # we will need to add NORAD number of each satellite to this url
found_periods = {}
for num in NORAD_missing_periods:
    url = base_url + str(num) 
    response = requests.get(url)
    html_content=response.text
    soup = BeautifulSoup(html_content, 'html.parser')
    sat_info = soup.find('div', id='satinfo')
    if sat_info:
        b_elements = sat_info.find_all('b')

        for b_element in b_elements:
            if b_element.get_text() == 'Period':
                next_sibling = b_element.nextSibling  # Get the next sibling node
                if next_sibling and hasattr(next_sibling, 'strip'):
                    period_text = next_sibling.strip()
                    period_text = period_text.replace(',', '')
                    period = float(period_text.split(':')[1].strip()[:-8])
                    print(f"Satellite: {num} has Period: {period}")
                    found_periods[num] = period
                    break
                else:
                    print("next sibling problem")
    else:
        print("No period was found!")
len(found_periods) # found 41 satellites' periods succefully 



Satellite: 54033 has Period: 1436.1
Satellite: 49464 has Period: 98.2
Satellite: 49465 has Period: 98.2
Satellite: 49466 has Period: 98.2
Satellite: 53586 has Period: 96.2
Satellite: 53876 has Period: 96.2
Satellite: 52935 has Period: 95.9
Satellite: 50001 has Period: 1436.1
Satellite: 50002 has Period: 1436.1
Satellite: 49954 has Period: 96.3
Satellite: 52736 has Period: 93.6
Satellite: 52769 has Period: 94.2
Satellite: 49052 has Period: 96.5
Satellite: 52937 has Period: 95.7
Satellite: 49055 has Period: 1436.1
Satellite: 51102 has Period: 103.4
Satellite: 50212 has Period: 1436.1
Satellite: 48846 has Period: 96.4
Satellite: 48847 has Period: 95.7
Satellite: 48848 has Period: 96.4
Satellite: 52889 has Period: 95.1
Satellite: 52890 has Period: 95.1
Satellite: 52891 has Period: 95.2
Satellite: 52892 has Period: 95.1
Satellite: 52942 has Period: 1556.9
Satellite: 52907 has Period: 94.5
Satellite: 52908 has Period: 94.5
Satellite: 52909 has Period: 93.7
Satellite: 53316 has Period: 94.5
S

41

In [15]:
# let's see which satellites we didn't find the periods for
NORAD_still_missing_periods = list(set(NORAD_missing_periods) - set(found_periods.keys()))
df_cleaned_copie.loc[df_cleaned_copie["NORAD Number"].isin(NORAD_still_missing_periods), ["Current Official Name of Satellite", "COSPAR Number"]]

# I couldn't find the periods for two of the three remaining satellites
# dropping two and setting the value of the found period 

df_cleaned_copie = df_cleaned_copie[~df_cleaned_copie["NORAD Number"].isin([52_901,49_392])]
found_periods[53_102]=97.3 # satellites USA 334 (looked for its period manually)
                           # the two remaining satellites are no longer in orbit

# replacing the NaN values in Period column with the values in found_periods
for index, row in df_cleaned_copie.iterrows():
    norad_number = row["NORAD Number"]
    if norad_number in found_periods:
        df_cleaned_copie.at[index, "Period (minutes)"] = found_periods[norad_number]
df_cleaned_copie.isnull().sum()



Name of Satellite, Alternate Names       0
Current Official Name of Satellite       0
Country/Org of UN Registry               0
Country of Operator/Owner                0
Operator/Owner                           0
Users                                    0
Purpose                                  0
Longitude of GEO (degrees)               2
Perigee (km)                             0
Apogee (km)                              0
Eccentricity                             0
Inclination (degrees)                    0
Period (minutes)                         0
Launch Mass (kg.)                      243
Date of Launch                           0
Expected Lifetime (yrs.)              1912
Contractor                               0
Country of Contractor                    0
Launch Site                              0
Launch Vehicle                           0
COSPAR Number                            0
NORAD Number                             0
Orbit                                    0
dtype: int6

the longitude of GEO (degrees) column:
For satellites that are in geosynchronous orbits GEO, this is the earth longitude of the point over which 
the satellite sits, in degrees. A “+” indicates longitude east of 0º (Greenwich) and a “-“ indicates 
longitude west.The column is blank for satellites in non-GEO orbits.

In [16]:
# looking through the satellites that are not in GEO yet there longitude of GEO is not equal to 0.0 degrees
# one satellite was found Angosat, upon researching it, this satellite is in GEO and not LEO as originally
# stated in the database, let's fix that
df_cleaned_copie.loc[(~df_cleaned_copie["Orbit"].str.contains("GEO") & df_cleaned_copie["Longitude of GEO (degrees)"] != 0.0), "Orbit"] = "GEO"

In [17]:
# finding the satellites where the longitude of GEO (degrees) is null (NaN)
df_cleaned_copie[df_cleaned_copie["Longitude of GEO (degrees)"].isna()]
# found two satellites with the corresponding NORAD Numbers 53_100 and 52_942
df_cleaned_copie.loc[df_cleaned_copie["NORAD Number"]==53_100, "Longitude of GEO (degrees)"] = 10.60
df_cleaned_copie.loc[df_cleaned_copie["NORAD Number"]==52_942, "Longitude of GEO (degrees)"] = -80.00

df_cleaned_copie.isnull().sum()

Name of Satellite, Alternate Names       0
Current Official Name of Satellite       0
Country/Org of UN Registry               0
Country of Operator/Owner                0
Operator/Owner                           0
Users                                    0
Purpose                                  0
Longitude of GEO (degrees)               0
Perigee (km)                             0
Apogee (km)                              0
Eccentricity                             0
Inclination (degrees)                    0
Period (minutes)                         0
Launch Mass (kg.)                      243
Date of Launch                           0
Expected Lifetime (yrs.)              1912
Contractor                               0
Country of Contractor                    0
Launch Site                              0
Launch Vehicle                           0
COSPAR Number                            0
NORAD Number                             0
Orbit                                    0
dtype: int6

Now let's reformat our 'Date of Launch' column

In [18]:
import datetime
for i in df_cleaned_copie["Date of Launch"].unique():
    if not isinstance(i, datetime.datetime):
        print(i)
# only one value is wrong, so let's fix that 
df_cleaned_copie.loc[df_cleaned_copie["Date of Launch"]=="11/29/018", "Date of Launch"] = pd.to_datetime("2018-11-29 00:00:00").to_pydatetime()


11/29/018


## Creating the Desired Dataframe

After the extensive cleaning process, we are now ready to construct a new dataframe containing only the columns that are essential for our analysis. The absence of data in the **launch masses** and **expected life times** columns will not pose any hindrance, as these variables will not be included in any of our future analyses.

**Note**:  
For the missing "expected life times," we have plans to develop a predictive model that will estimate the expected lifespan of a satellite based on a set of criteria specific to each satellite. This endeavor will be addressed at a later stage of our project.
 
### essential columns:
We will classify our columns into two categories: **Satellite Details** and **Orbital Paramters**
#### Satellite Details
| Name      | Description |
| ----------- | ----------- |
| **Current Official Name of Satellite** | the current official name for the satellite.|
| **Country/Organization of UN Registry** | This information shows the country responsible for satellite registration in the United Nations, with "NR" meaning unregistered, "NR (xx/xx)" for recently launched unregistered satellites, and it also refers to the launching state according to the Convention on Objects Launched into Outer Space, reflecting the complexities of commercial satellite ownership and control.|
| **Country of Operator/Owner**|This information specifies the home country associated with the operator or owner listed in column D for a satellite, indicating the country of operation or ownership, with exceptions for projects involving the European Space Agency (ESA), which are designated as "ESA" due to their joint efforts by its 15 member states.|
| **Operator/Owner**|The satellite’s current operational controller. The operator is not necessarily the satellite’s owner, satellites may be leased, for example.|
|**Users**|The affiliation of the primary users of the satellite is described with one or more of the keywords: civil (academic, amateur), commercial, government (meteorological, scientific, etc.), military. Satellites can be multi-use, hosting, for example, dedicated transponders for both commercial and military applications.|
|**Purpose**|The discipline in which the satellite is used in broad categories. The purposes listed are those self-reported by the satellite’s operator. A slash between terms indicates the satellite is used for multiple purposes.|
|**Date of Launch**| The date when the satellite was launched.|
|**Contractor**|The prime contractor for the satellite’s construction. The construction of satellites generally involves a number of subcontractors as well. Frequent corporate mergers mean that the name listed as the prime contractor may not be the name of that corporation today. In creating the database, we listed what was shown on the company or agency’s website at the time the database was originally constructed. (These will not necessarily be updated with each new version of the database).|
|**Country of Contractor**|The home nation of the corporation, institution, or governmental agency that was prime contractor for the construction of the satellite.|
|**Launch Site**|The name and/or location of launch facility.|
|**Launch Vehicle**|The name and model of the launch vehicle used to lift the satellite into orbit. The launch is often contracted separately from the construction of the satellite, either by the prime contractor or the owner of the satellite.|

#### satellite ids

|Name|Description|
|----|-----------|
|**COSPAR Number**|The COSPAR number is the international designation assigned by the Committee on Space Research (COSPAR) to each object launched into space. Names of satellites often change, but this number remains constant. The number reflects the year of the launch and sequence of launch within that year. For example, a COSPAR number of 1998-063B would indicate that the satellite was launched in 1998, and that it was on the 63rd successful launch of that year. The “B” indicates that the given satellite was the second object catalogued from that launch.|
|**NORAD Number**|The NORAD number is the five-digit number assigned by the North American Aerospace Defense Command (NORAD) for each satellite in their catalogue. The number is assigned when an object is first observed, and remains with the object throughout its existence. |

#### Orbital Parameters


|Name|Description|
|----|-----------|
|**Longitude of GEO (degrees)**|For satellites that are in geosynchronous orbits, this is the earth longitude of the point over which the satellite sits, in degrees. A “+” indicates longitude east of 0º (Greenwich) and a “-“ indicates longitude west.The column is blank for satellites in non-GEO orbits. |
|**Perigee (km)**|The altitude above the Earth’s surface of the satellite’s perigee, which is the point of the orbit closest to the Earth’s center of mass, given in kilometers. |
|**Apogee (km)**|The altitude above the Earth’s surface of the satellite’s apogee, which is the point of the orbit farthest from the Earth’s center of mass, given in kilometers. |
|**Eccentricity**|The eccentricity, $\epsilon$ , of a satellite’s orbit describes how strongly the orbit deviates from a circle. It is calculated with the following relation: $$\epsilon = \frac{h_a-h_p}{h_a+h_p+2R_e}$$ where: $h_a$ altitude of the satellite above the earth at apogee, $h_p$ is the altitude at perigee and $R_e$ is the earth’s radius (we use the approximate value of the mean earth radius, $Re = 6370 km$)|
|**Inclination (degrees)**|The angle between the orbital plane of the satellite and equatorial plane of the Earth, measured in degrees.
|**Period (minutes)**|The time required for the satellite to complete one full orbit of the Earth, given in minutes.|
|**Launch Mass (kg.)**|The mass of the satellite at the time of launch, including fuel, given in kilograms.|
|**Expected Lifetime (yrs.)**|The planned operational lifetime of the satellite, given in years. This figure is reported by the satellite’s operator and may be based on the expected failure rate for the hardware and software of the satellite, the fuel capacity of the satellite and the expected requirements for maneuvering and stationkeeping (many satellites run out of fuel long before their hardware and software wear out), the planned budget for operating the satellite, and the expected availability of improved future generation satellites. This figure can be misleading, especially in terms of scientific satellites. For example, the Akebono satellite, launched in 1989 with a design life of one year, is still functioning in 2009.|
|**Orbit**| for detailed information about different classes visit this [link](https://s3.amazonaws.com/ucs-documents/nuclear-weapons/sat-database/4-11-17-update/User+Guide+1-1-17+wAppendix.pdf)|

In [19]:
# dataframe for satellite details
satellite_details = ["NORAD Number",
                     "COSPAR Number",
                     "Current Official Name of Satellite",
                     "Country/Org of UN Registry",
                     "Country of Operator/Owner",
                     "Operator/Owner",
                     "Users",
                     "Purpose",
                     "Date of Launch",
                     "Contractor",
                     "Country of Contractor",
                     "Launch Site",
                     "Launch Vehicle"]
df_satellite_details = df_cleaned_copie[satellite_details]
#df_satellite_details['Date of Launch'] = pd.to_datetime(df_satellite_details['Date of Launch'])
df_satellite_details.to_csv('resources/satellite_details.csv', index=False)
df_satellite_details.head()

Unnamed: 0,NORAD Number,COSPAR Number,Current Official Name of Satellite,Country/Org of UN Registry,Country of Operator/Owner,Operator/Owner,Users,Purpose,Date of Launch,Contractor,Country of Contractor,Launch Site,Launch Vehicle
0,44859,2019-089H,1HOPSAT-TD,NR,USA,Hera Systems,Commercial,Earth Observation,2019-12-11 00:00:00,Hera Systems,USA,Satish Dhawan Space Centre,PSLV
1,42775,2017-036L,Aalto-1,Finland,Finland,Aalto University,Civil,Technology Development,2017-06-23 00:00:00,Aalto University,Finland,Satish Dhawan Space Centre,PSLV
2,41460,2016-025E,AAt-4,Denmark,Denmark,University of Aalborg,Civil,Earth Observation,2016-04-25 00:00:00,University of Aalborg,Denmark,Guiana Space Center,Soyuz-2.1a
3,39508,2014-006A,ABS-2,NR,Multinational,Asia Broadcast Satellite Ltd.,Commercial,Communications,2014-02-06 00:00:00,Space Systems/Loral,USA,Guiana Space Center,Ariane 5 ECA
4,41588,2016-038A,ABS-2A,NR,Multinational,Asia Broadcast Satellite Ltd.,Commercial,Communications,2016-06-15 00:00:00,Boeing Satellite Systems,USA,Cape Canaveral,Falcon 9


In [20]:
# dataframe for satellite orbital parameters
orbital_parameters =["NORAD Number", 
                     "COSPAR Number",
                     "Orbit",
                     "Longitude of GEO (degrees)",
                     "Perigee (km)",
                     "Apogee (km)",
                     "Eccentricity",
                     "Inclination (degrees)",
                     "Period (minutes)",
                     "Launch Mass (kg.)",
                     "Expected Lifetime (yrs.)",
                     ]
df_orbital_parameters = df_cleaned_copie[orbital_parameters]
df_orbital_parameters.head()

Unnamed: 0,NORAD Number,COSPAR Number,Orbit,Longitude of GEO (degrees),Perigee (km),Apogee (km),Eccentricity,Inclination (degrees),Period (minutes),Launch Mass (kg.),Expected Lifetime (yrs.)
0,44859,2019-089H,LEO - Non-Polar Inclined,0.0,566,576,0.00151,36.9,96.08,22.0,0.5
1,42775,2017-036L,LEO - Sun-Synchronous,0.0,497,517,0.00145,97.45,94.7,4.5,2.0
2,41460,2016-025E,LEO - Sun-Synchronous,0.0,442,687,0.00151,98.2,95.9,1.0,
3,39508,2014-006A,GEO - Equatorial,75.0,35778,35793,0.000178,0.08,1436.03,6330.0,15.0
4,41588,2016-038A,GEO - Equatorial,-75.0,35700,35700,0.0,0.0,1436.1,1800.0,15.0


### Checking the eccentricity

In [23]:
print(f"maximum eccentricity {df_orbital_parameters['Eccentricity'].max()}")
print(f"minimum eccentricity {df_orbital_parameters['Eccentricity'].min()}")

maximum eccentricity 575.0
minimum eccentricity -0.033428700496913114


the maximum of eccentricity should be 1 and its minimum 0, it varies between 0 and 1 
we will fix that by recalculating the eccentricity for all the satellites using
the following equation: 
$$\epsilon = \frac{h_a - h_p}{h_a+h_p+R_e}$$

but before doing that, let's handle the negative values

In [27]:
# let's find the apogee and perigee of the satellites where the eccentricity is negative
negative_eccentricities = df_orbital_parameters.loc[df_orbital_parameters["Eccentricity"]<0, ["Apogee (km)", "Perigee (km)", "NORAD Number"]]
negative_eccentricities

Unnamed: 0,Apogee (km),Perigee (km),NORAD Number
6634,49,493,49390


it seems like there was a typo in entering the Apogee and Perigee for this satellite, I did some research using this satellite's NORAD Number and found the right Apogee and Perigee it, the values were rounded to the nearest integer

In [42]:
df_orbital_parameters.loc[df_orbital_parameters["Eccentricity"]<0, "Apogee (km)"] = 508
df_orbital_parameters.loc[df_orbital_parameters["Eccentricity"]<0, "Perigee (km)"] = 503

Now for the values that are greater than 1, it seems like there was a miscalculation in the eccentricity

In [45]:
df_orbital_parameters.loc[df_orbital_parameters["Eccentricity"]>1]

Unnamed: 0,NORAD Number,COSPAR Number,Orbit,Longitude of GEO (degrees),Perigee (km),Apogee (km),Eccentricity,Inclination (degrees),Period (minutes),Launch Mass (kg.),Expected Lifetime (yrs.)
51,52160,2022-033D,LEO - Sun-Synchronous,0.0,499,511,511.0,97.4,95.0,1.0,
2384,51096,2022-003C,LEO - Non-Polar Inclined,0.0,492,495,499.0,45.0,94.0,4.0,
6259,53815,2022-113A,LEO - Sun-Synchronous,0.0,558,575,575.0,97.6,94.6,150.0,
6661,51950,2022-023E,LEO - Non-Polar Inclined,0.0,480,500,500.0,63.5,107.1,190.0,
6662,51951,2022-023F,LEO - Non-Polar Inclined,0.0,480,501,500.0,63.5,107.1,190.0,


let's recalculate the eccentricty column

In [51]:
h_a = df_orbital_parameters['Apogee (km)']
h_p = df_orbital_parameters['Perigee (km)']
R_earth = 6370 # eath radius in km
df_orbital_parameters.loc[:, "Eccentricity"] = (h_a-h_p)/(h_a+h_p+R_earth)
df_cleaned_copie.loc[:, "Eccentricity"] = df_orbital_parameters["Eccentricity"]
print("maximum eccentricity: ", df_orbital_parameters["Eccentricity"].max())
print("minimum eccentricity: ", df_orbital_parameters["Eccentricity"].min())

maximum eccentricity:  0.9751904437264725
minimum eccentricity:  0.0


In [52]:
df_orbital_parameters.to_csv('resources/satellite_orbital_parameters.csv', index=False)
df_orbital_parameters.head()

Unnamed: 0,NORAD Number,COSPAR Number,Orbit,Longitude of GEO (degrees),Perigee (km),Apogee (km),Eccentricity,Inclination (degrees),Period (minutes),Launch Mass (kg.),Expected Lifetime (yrs.)
0,44859,2019-089H,LEO - Non-Polar Inclined,0.0,566,576,0.001331,36.9,96.08,22.0,0.5
1,42775,2017-036L,LEO - Sun-Synchronous,0.0,497,517,0.002709,97.45,94.7,4.5,2.0
2,41460,2016-025E,LEO - Sun-Synchronous,0.0,442,687,0.032671,98.2,95.9,1.0,
3,39508,2014-006A,GEO - Equatorial,75.0,35778,35793,0.000192,0.08,1436.03,6330.0,15.0
4,41588,2016-038A,GEO - Equatorial,-75.0,35700,35700,0.0,0.0,1436.1,1800.0,15.0


In [54]:
# saving the whole dataframe as csv
df_cleaned_copie.to_csv('resources/satellite_data.csv', index=False)