In [73]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from astropy import units as u
from astropy import constants as const

df = pd.read_csv('../../course_assignments/homework9/solar_system.csv')

print(df.shape)
df = df.set_index("Attribute").T
df.index.name = "Planet"
df.reset_index(inplace=True)
print(df.shape)
df.columns.name = None
print(df.shape)

# 5 Answer the following:

# b). Before transposing, there were 20 rows and 11 columns. After transposing, there were 10 rows and 21 columns.

# c). The line 'df.set_index("Attribute").T does two things. It first makes the column of "Attribute" within the data frame the new index column,
# under which all row labels are. It also then transposes the data frame, making the placement of the rows and the columns switch.
# The following line, 'df.index.name = "Planet"', creates a new index column within a newly created row under the original index column of "Attributes", although 
# the row does not seem to come up in the shape. The next line, 'df.reset_index(inplace=True)', resets the data frame to have a numerical index column at its start, 
# hence why there is now a shape of (10,21). The final line, 'df.columns.name=None', gets rid of the "Attribute" label for the column index, and instead is replaced with the
# newly made index name from before, "Planet".

# d). The new shape is now (11,20) because one of the columns, the numerical index column, was changed over to still be a column in the trasposed data,
# hence why there is a 21rst column in the new data frame, and one less row.

# e).

print(df.columns)

# f). There are 16 columns with units: Mass (10^24 kg), Diameter (km), Density (kg/m^3), Gravity (m/s^2), Escape Velocity (km/s), Rotation Period (hours),
# Length of Day (hours), Distance from Sun (10^6 km), Perihelion (10^6 km), Aphelion (10^6 km), Orbital Period (days), Orbital Velocity (km/s), Orbital Inclination (deg),
# Obliquity to Orbit (deg), Mean Temperature (C), and Surface Pressure (bars).

# There are 5 columns without units: Planet, Orbital Eccentricity, Number of Moons, Ring System, and Global Magnetic Field.

print(df)



# --- 2.1.2 Investigate Data Types

for col in df:
    df[col].apply(type)

print(df.dtypes)

# 3 a). The actual values should be Strings, floats and objects. Instead, all are said to be objects.
# 3 b). 

# --- 2.1.3 Convert Strings to Numbers

skip = ['Planet', 'Ring System?', 'Global Magnetic Field?']
for col in df:
    if col not in skip:
        df[col] = pd.to_numeric(df[col], errors = "coerce")
print(df.dtypes)

# 3. a). The three different data types now are object, float64, and int64.
# 3 b). The data type of 'object' is , the data type of 'float64' is , and the data type of 'int64' is .
# 3 c). The columns that are still objects are 'Planet', 'Ring System?', and 'Global Magnetic Field?', because they were
# the three that were skipped during the for loop to turn columns into numeric data types.


# --- 2.2 Astropy Units

def attach_units():
    unit = 1e6 * u.km
    old_col = "Distance from Sun (10^6 km)"
    new_col = "Distance from Sun (km)"
    new_values = []
    for val in df[old_col]:
        new_values.append(val * unit)

    df[old_col] = new_values
    df.rename(columns = {old_col : new_col}, inplace=True)

attach_units()

print(df["Distance from Sun (km)"])

Per = df["Perihelion (10^6 km)"].values
Aph = df["Aphelion (10^6 km)"].values
Semi_Major = (Per + Aph) / 2

df.insert((df.columns.get_loc('Aphelion (10^6 km)'))+1, 'Semi Major Axis', Semi_Major)

print(df.columns)
print(df["Semi Major Axis"])
print(type(df["Semi Major Axis"].iloc[0]))

df["Orbital Period (days)"] = (df["Orbital Period (days)"].values * u.day).to(u.yr).value

df.rename(columns = {"Orbital Period (days)" : "Orbital Period (years)"}, inplace=True)

print(df.columns)
print(df["Orbital Period (years)"])
print(f"Earth's Orbital Period in years: {df.iloc[2]["Orbital Period (years)"]:.4f}")

# --- Convert Distance Columns (km -> AU)
from astropy import units as u
from astropy import constants as const
# AU is basically a standard astronomical unit, equal to the average distance between the Earth and the Sun. It is used as a universal-scale unit of measurement.

print(const.au)
# There are 149597870.7 kilometers in one AU
AU_inKm = (const.au).to(u.km)


df["Diameter (km)"] = df["Diameter (km)"] / AU_inKm
df["Distance from Sun (km)"] = df["Distance from Sun (km)"] / AU_inKm
df["Perihelion (10^6 km)"] = (df["Perihelion (10^6 km)"] * 1e6) / AU_inKm
df["Aphelion (10^6 km)"] = (df["Aphelion (10^6 km)"] * 1e6) / AU_inKm

df.rename(columns = {"Diameter (km)" : "Diameter (AU)"}, inplace=True)
df.rename(columns = {"Distance from Sun (km)" : "Distance from Sun (AU)"}, inplace=True)
df.rename(columns = {"Perihelion (10^6 km)" : "Perihelion (AU)"}, inplace=True)
df.rename(columns = {"Aphelion (10^6 km)" : "Aphelion (AU)"}, inplace=True)

print(df.iloc[2]["Diameter (AU)"])
print(df.iloc[2]["Distance from Sun (AU)"])
print(df.iloc[2]["Perihelion (AU)"])
print(df.iloc[2]["Aphelion (AU)"])

# 2.4 Save Your Results
df.to_csv("units.csv", index = False)



(20, 11)
(10, 21)
(10, 21)
Index(['Planet', 'Mass (10^24kg)', 'Diameter (km)', 'Density (kg/m^3)',
       'Gravity (m/s^2)', 'Escape Velocity (km/s)', 'Rotation Period (hours)',
       'Length of Day (hours)', 'Distance from Sun (10^6 km)',
       'Perihelion (10^6 km)', 'Aphelion (10^6 km)', 'Orbital Period (days)',
       'Orbital Velocity (km/s)', 'Orbital Inclination (deg)',
       'Orbital Eccentricity', 'Obliquity to Orbit (deg)',
       'Mean Temperature (C)', 'Surface Pressure (bars)', 'Number of Moons',
       'Ring System?', 'Global Magnetic Field?'],
      dtype='object')
    Planet Mass (10^24kg) Diameter (km) Density (kg/m^3) Gravity (m/s^2)  \
0  Mercury          0.330          4879             5429             3.7   
1    Venus           4.87         12104             5243             8.9   
2    Earth           5.97         12756             5514             9.8   
3     Moon          0.073          3475             3340             1.6   
4     Mars          0.642     