In [4]:
# Solar Power
import pandas as pd

# Load the data from the uploaded CSV file
data = pd.read_csv('solar-pv-prices-cumulative-capacity-solar-generation.csv')

# Display the first few rows of the dataset
data.head()


Unnamed: 0,Unnamed: 1,Code,Year,Solar photovoltaic module price,Solar photovoltaic cumulative capacity,Solar generation - TWh,Solar PV investment,Unit,GDP per capita
0,World,OWID_WRL,1983,18.48658,29.255,0.003,,USD,
1,World,OWID_WRL,1984,17.17762,46.705,0.006311,,USD,
2,World,OWID_WRL,1985,14.996445,66.455,0.011747,,USD,
3,World,OWID_WRL,1986,12.40504,89.155,0.015184,,USD,
4,World,OWID_WRL,1987,10.546382,113.655,0.010603,,USD,


In [5]:
# Drop rows with missing values in relevant columns
cleaned_data = data.dropna(subset=['Solar generation - TWh', 
                                   'GDP per capita', 
                                   'Solar PV investment', 
                                   'Solar photovoltaic module price'])

# Drop rows with missing values
data_cleaned = data.dropna()

In [6]:
import statsmodels.api as sm
import numpy as np

# Define the independent variables (X) and the dependent variable (y)
X = data_cleaned[['GDP per capita', 'Solar PV investment', 'Solar photovoltaic module price']]
y = data_cleaned['Solar generation - TWh']

# Take the log of the variables
X_log = np.log(X)
y_log = np.log(y)

# Add a constant to the model (intercept)
X_log_const = sm.add_constant(X_log)

# Fit the OLS model
model = sm.OLS(y_log, X_log_const).fit()

# Get the summary of the model
model_summary = model.summary()

model_summary



0,1,2,3
Dep. Variable:,Solar generation - TWh,R-squared:,0.998
Model:,OLS,Adj. R-squared:,0.997
Method:,Least Squares,F-statistic:,1581.0
Date:,"Wed, 06 Sep 2023",Prob (F-statistic):,9e-15
Time:,02:37:53,Log-Likelihood:,15.916
No. Observations:,15,AIC:,-23.83
Df Residuals:,11,BIC:,-21.0
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-66.0349,8.722,-7.571,0.000,-85.233,-46.837
GDP per capita,6.1934,0.994,6.229,0.000,4.005,8.382
Solar PV investment,0.4427,0.061,7.216,0.000,0.308,0.578
Solar photovoltaic module price,-0.9330,0.082,-11.312,0.000,-1.115,-0.751

0,1,2,3
Omnibus:,0.983,Durbin-Watson:,2.234
Prob(Omnibus):,0.612,Jarque-Bera (JB):,0.532
Skew:,0.447,Prob(JB):,0.767
Kurtosis:,2.775,Cond. No.,9330.0


In [7]:
# Wind Power
wind_data = pd.read_csv('Wind_power.csv')

# Display the first few rows of the wind data
wind_data.head()

Unnamed: 0,Entity,Code,Year,Wind generation - TWh,Wind Investment,GDP per capita,Wind energy cumulative capacity(GW),Wind energy cumulative capacity(MW),Wind energy cumulative capacity(TW),Onshore Wind total installed cost(USD/km),Offshore Wind total installed cost(USD/km),Total wind installed cost
0,World,OWID_WRL,2000,31.409636,,11108.324,16941.523,16941520.0,16.941523,,,
1,World,OWID_WRL,2001,38.380093,,11215.04,23968.645,23968640.0,23.968645,,,
2,World,OWID_WRL,2002,52.381878,,11367.639,30.733092,30733.09,0.030733,,,
3,World,OWID_WRL,2003,63.2847,,11637.828,38.669957,38669.96,0.03867,,,
4,World,OWID_WRL,2004,85.618484,18900000000.0,12074.206,47.68421,47684.21,0.047684,,,


In [8]:
# Remove rows with missing values in the relevant columns
filtered_wind_data = wind_data.dropna(subset=["Wind generation - TWh", "Wind Investment", "Total wind installed cost"])

# Display the shape of the filtered dataset
filtered_wind_data.shape

import statsmodels.api as sm
import numpy as np

# Take logarithm of the relevant columns
log_wind_generation = np.log(filtered_wind_data["Wind generation - TWh"])
log_wind_investment = np.log(filtered_wind_data["Wind Investment"])
log_total_wind_installed_cost = np.log(filtered_wind_data["Total wind installed cost"])

# Construct the independent variables matrix with an intercept
X = sm.add_constant(pd.DataFrame({'log_wind_investment': log_wind_investment, 'log_total_wind_installed_cost': log_total_wind_installed_cost}))

# Perform regression analysis
model = sm.OLS(log_wind_generation, X).fit()

# Display the regression results
model.summary()



0,1,2,3
Dep. Variable:,Wind generation - TWh,R-squared:,0.651
Model:,OLS,Adj. R-squared:,0.535
Method:,Least Squares,F-statistic:,5.607
Date:,"Wed, 06 Sep 2023",Prob (F-statistic):,0.0423
Time:,02:38:42,Log-Likelihood:,-0.022423
No. Observations:,9,AIC:,6.045
Df Residuals:,6,BIC:,6.637
Df Model:,2,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-18.6826,26.710,-0.699,0.510,-84.040,46.675
log_wind_investment,1.4243,0.595,2.393,0.054,-0.032,2.881
log_total_wind_installed_cost,-1.2339,1.742,-0.708,0.505,-5.497,3.029

0,1,2,3
Omnibus:,11.613,Durbin-Watson:,0.965
Prob(Omnibus):,0.003,Jarque-Bera (JB):,4.619
Skew:,-1.529,Prob(JB):,0.0993
Kurtosis:,4.721,Cond. No.,7270.0
