In [1]:
import numpy as np # 
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
satellite_df=pd.read_csv('./UCSDB.csv',index_col=None, thousands=',')
country_simple=pd.read_csv('./country_codes_simple_cleaned.csv', index_col=None, thousands=',')

In [4]:
satellite_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7560 entries, 0 to 7559
Data columns (total 17 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   satellite_name            7560 non-null   object 
 1   operator_country          7560 non-null   object 
 2   owner                     7560 non-null   object 
 3   users                     7560 non-null   object 
 4   purpose                   7560 non-null   object 
 5   class_of_orbit            7560 non-null   object 
 6   type_of_orbit             6909 non-null   object 
 7   longitude_of_geo_degrees  7557 non-null   float64
 8   perigee_km                7553 non-null   float64
 9   apogee_km                 7553 non-null   float64
 10  eccentricity              7549 non-null   float64
 11  inclination_degrees       7556 non-null   float64
 12  period_minutes            7504 non-null   float64
 13  launch_mass_kg            7315 non-null   float64
 14  date_of_

In [6]:
country_simple.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 249 entries, 0 to 248
Data columns (total 3 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   numeric_code  249 non-null    int64 
 1   country_name  249 non-null    object
 2   alpha3_code   249 non-null    object
dtypes: int64(1), object(2)
memory usage: 6.0+ KB


In [7]:
# Convert country names in country_codes_df to a set for fast lookup
valid_countries = set(country_simple['country_name'])

# Check launch_country presence
satellite_df['launch_country_in_codes'] = satellite_df['launch_country'].apply(lambda x: x in valid_countries)

# Check operator_country presence
satellite_df['operator_country_in_codes'] = satellite_df['operator_country'].apply(lambda x: x in valid_countries)

# Optionally, print counts of countries not found
print("Launch countries not found:", satellite_df.loc[~satellite_df['launch_country_in_codes'], 'launch_country'].unique())
print("Operator countries not found:", satellite_df.loc[~satellite_df['operator_country_in_codes'], 'operator_country'].unique())

Launch countries not found: ['International' 'International Waters']
Operator countries not found: ['Multinational']


In [8]:
iso_num_map = country_simple.set_index('country_name')['numeric_code'].to_dict()
iso_alpha3_map = country_simple.set_index('country_name')['alpha3_code'].to_dict()

# Map ISO numeric and alpha3 codes for operator_country
satellite_df['operator_iso_num'] = satellite_df['operator_country'].map(iso_num_map)
satellite_df['operator_iso_alpha3'] = satellite_df['operator_country'].map(iso_alpha3_map)

# Map ISO numeric and alpha3 codes for launch_country
satellite_df['launch_iso_num'] = satellite_df['launch_country'].map(iso_num_map)
satellite_df['launch_iso_alpha3'] = satellite_df['launch_country'].map(iso_alpha3_map)

In [9]:
satellite_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7560 entries, 0 to 7559
Data columns (total 23 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   satellite_name             7560 non-null   object 
 1   operator_country           7560 non-null   object 
 2   owner                      7560 non-null   object 
 3   users                      7560 non-null   object 
 4   purpose                    7560 non-null   object 
 5   class_of_orbit             7560 non-null   object 
 6   type_of_orbit              6909 non-null   object 
 7   longitude_of_geo_degrees   7557 non-null   float64
 8   perigee_km                 7553 non-null   float64
 9   apogee_km                  7553 non-null   float64
 10  eccentricity               7549 non-null   float64
 11  inclination_degrees        7556 non-null   float64
 12  period_minutes             7504 non-null   float64
 13  launch_mass_kg             7315 non-null   float

In [10]:
satellite_df.head()

Unnamed: 0,satellite_name,operator_country,owner,users,purpose,class_of_orbit,type_of_orbit,longitude_of_geo_degrees,perigee_km,apogee_km,...,launch_mass_kg,date_of_launch,expected_lifetime_yrs,launch_country,launch_country_in_codes,operator_country_in_codes,operator_iso_num,operator_iso_alpha3,launch_iso_num,launch_iso_alpha3
0,1HOPSAT-TD,United States,Hera Systems,Commercial,EARTH OBSERVATION,LEO,NON-POLAR INCLINED,0.0,566.0,576.0,...,22.0,2019,0.5,India,True,True,840.0,USA,356.0,IND
1,AAC AIS-Sat1 (Kelpie 1),United Kingdom,AAC Clyde Space,Commercial,EARTH OBSERVATION,LEO,SUN-SYNCHRONOUS,0.0,637.0,654.0,...,4.0,2023,,United States,True,True,826.0,GBR,840.0,USA
2,Aalto-1,Finland,Aalto University,Civil,TECHNOLOGY DEVELOPMENT,LEO,SUN-SYNCHRONOUS,0.0,497.0,517.0,...,4.5,2017,2.0,India,True,True,246.0,FIN,356.0,IND
3,AAt-4,Denmark,University of Aalborg,Civil,EARTH OBSERVATION,LEO,SUN-SYNCHRONOUS,0.0,442.0,687.0,...,1.0,2016,,French Guiana,True,True,208.0,DNK,254.0,GUF
4,ABS-2,Multinational,Asia Broadcast Satellite Ltd.,Commercial,COMMUNICATIONS,GEO,,75.0,35778.0,35793.0,...,6330.0,2014,15.0,French Guiana,True,False,,,254.0,GUF


In [11]:
satellite_df.to_csv('UCSDB1.csv', index=False)