In [None]:
import pandas as pd
import numpy as np

df = pd.read_csv('/kaggle/input/starship-price-dataset/starship_prices_dataset.csv')

print("This the start of the data")
print(df.head())

print("This is the end of the data")
print(df.tail())

In [None]:
# Check to see if there are any blank cells
print(df.isnull().sum())

In [None]:
# Linear Regression Output Stats

import statsmodels.api as sm

X = df[['Engine_Size', 'Weight', 'Height', 'Top_Speed']]
Y = df['Price']

X = sm.add_constant(X)

model = sm.OLS(Y, X)
results = model.fit()
print(results.summary())

In [None]:
# HISTOGRAMS

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Create the empty 2x2 grid
fig, axes = plt.subplots(nrows=2, ncols=3, figsize=(12, 10))


# Plot 1: Top-Left (engine_size)
sns.histplot(data=df, x='Engine_Size', ax=axes[0, 0], kde=True, bins=10)
axes[0, 0].set_title('Distribution of Engine Size')

# Plot 2: Top-Right (top_speed)
sns.histplot(data=df, x='Top_Speed', ax=axes[0, 1], kde=True, color='green', bins=10)
axes[0, 1].set_title('Distribution of Top Speed')

sns.histplot(data=df, x='Price', ax=axes[0, 2], kde=True, color='green', bins=10)
axes[0, 2].set_title('Distribution of Price')

# Plot 3: Bottom-Left (height)
sns.histplot(data=df, x='Height', ax=axes[1, 0], kde=True, color='red',bins=10)
axes[1, 0].set_title('Distribution of Height')

# Plot 4: Bottom-Right (weight)
sns.histplot(data=df, x='Weight', ax=axes[1, 1], kde=True, color='purple', bins=10)
axes[1, 1].set_title('Distribution of Weight')

fig.delaxes(axes[1, 2])
 
plt.tight_layout()
plt.show()

In [None]:
# SCATTERPLOTS

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

dependent_variable = 'Price'


fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(14, 10))
fig.suptitle('Relationship between Independent Variables and Price', fontsize=16)


x_var_1 = 'Engine_Size'  
sns.scatterplot(data=df, x=x_var_1, y=dependent_variable, ax=axes[0, 0])
sns.regplot(data=df, x=x_var_1, y=dependent_variable, ax=axes[0, 0], line_kws={"color":"red"})
axes[0, 0].set_title(f'{dependent_variable} vs. {x_var_1}')


x_var_2 = 'Top_Speed' 
sns.scatterplot(data=df, x=x_var_2, y=dependent_variable, ax=axes[0, 1])
sns.regplot(data=df, x=x_var_2, y=dependent_variable, ax=axes[0, 1], line_kws={"color":"red"})
axes[0, 1].set_title(f'{dependent_variable} vs. {x_var_2}')


x_var_3 = 'Height'  
sns.scatterplot(data=df, x=x_var_3, y=dependent_variable, ax=axes[1, 0])
sns.regplot(data=df, x=x_var_3, y=dependent_variable, ax=axes[1, 0], line_kws={"color":"red"})
axes[1, 0].set_title(f'{dependent_variable} vs. {x_var_3}')


x_var_4 = 'Weight'  
sns.scatterplot(data=df, x=x_var_4, y=dependent_variable, ax=axes[1, 1])
sns.regplot(data=df, x=x_var_4, y=dependent_variable, ax=axes[1, 1], line_kws={"color":"red"})
axes[1, 1].set_title(f'{dependent_variable} vs. {x_var_4}')



plt.tight_layout(rect=[0, 0.03, 1, 0.95])
plt.show()