In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
import statsmodels.api as sm

In [None]:
df = pd.read_csv("FDatasets.csv")

In [None]:
df.columns = df.columns.str.strip()
df.rename(columns={
    'Crude Oil Production BBL per Day (Thousands': 'Crude_Oil_Production',
    'Natural Gas Production Billion Cubic Feet per Day': 'Gas_Production',
    'GDP Billion USD': 'GDP'
}, inplace=True)

In [None]:
# Convert to numeric and drop missing values

In [None]:
df['Crude_Oil_Production'] = pd.to_numeric(df['Crude_Oil_Production'], errors='coerce')
df['Gas_Production'] = pd.to_numeric(df['Gas_Production'], errors='coerce')
df['GDP'] = pd.to_numeric(df['GDP'], errors='coerce')
df.dropna(inplace=True)

In [None]:
# NumPy Operations

In [None]:
mean_oil = np.mean(df['Crude_Oil_Production'])
median_gas = np.median(df['Gas_Production'])
std_gdp = np.std(df['GDP'])
max_oil = np.max(df['Crude_Oil_Production'])
min_gdp = np.min(df['GDP'])

print("NumPy Operations:")
print(f"Mean Crude Oil Production: {mean_oil}")
print(f"Median Gas Production: {median_gas}")
print(f"Standard Deviation of GDP: {std_gdp}")
print(f"Max Crude Oil Production: {max_oil}")
print(f"Min GDP: {min_gdp}")
print("-" * 50)

In [None]:
# SciPy Operation

In [None]:
corr_coef, p_value = stats.pearsonr(df['Crude_Oil_Production'], df['GDP'])
print("SciPy Pearson Correlation:")
print(f"Correlation coefficient: {corr_coef}")
print(f"P-value: {p_value}")
print("-" * 50)

In [None]:
# Statsmodels Operation 

In [None]:
X = sm.add_constant(df['Crude_Oil_Production'])  # Add constant (intercept)
y = df['GDP']
model = sm.OLS(y, X).fit()
print("Statsmodels OLS Regression Summary:")
print(model.summary())
print("-" * 50)

In [None]:
# Time Series of Oil Production

In [None]:
plt.figure(figsize=(10, 5))
sns.lineplot(x='Year', y='Crude_Oil_Production', hue='Country', data=df)
plt.title('Crude Oil Production Over Time')
plt.ylabel('Thousands of Barrels per Day')
plt.xlabel('Year')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [None]:
# Scatter with Regression Line

In [None]:
plt.figure(figsize=(8, 6))
sns.regplot(x='Crude_Oil_Production', y='GDP', data=df)
plt.title('Crude Oil Production vs GDP')
plt.xlabel('Crude Oil Production (Thousands of BBL/day)')
plt.ylabel('GDP (Billion USD)')
plt.tight_layout()
plt.show()

In [None]:
# Correlation Heatmap

In [None]:
plt.figure(figsize=(6, 4))
corr_matrix = df[['Crude_Oil_Production', 'Gas_Production', 'GDP']].corr()
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', fmt=".2f")
plt.title('Correlation Matrix')
plt.tight_layout()
plt.show()
