# Statsmodels Sheet

## Simple Linear Regression

In [None]:
import statsmodels.api as sm

x1 = data[['independent_variable']]
y = data['dependent_variable']

x = sm.add_constant(x1)
results = sm.OLS(y,x).fit()
results.summary()

# y-intercept
y_intercept = results.params[0]

# independent variable coefficient
coefficient = results.params[1]

# R2 value
R2 = results.rsquared

#p-value
P = results.pvalues[1] 

# Regression model could be given as
yhat = (coefficient * x1) + y_intercept

## Multiple Linear Regression

In [None]:
import statsmodels.api as sm

x = data[['independent_variable_1','independent_variable_2']]
y = data['dependent_variable']

x = sm.add_constant(x1)
results = sm.OLS(y,x).fit()
results.summary()

# y-intercept
y_intercept = results.params[0]

# independent variables coefficients
coefficient_1 = results.params[1]
coefficient_2 = results.params[2]

# R2 value
R2 = results.rsquared

# adjusted R2 value
adj_R2 = results.rsquared_adj

#p-values
P1 = results.pvalues[1] 
P2 = results.pvalues[2] 

# Regression model could be given as
yhat = (coefficient_1 * x1['independent_variable_1']) + (coefficient_2 * x1['independent_variable_2']) + y_intercept

## Dummy Variables

In [None]:
data = raw_data.copy()
data['independent_variable'] = data['independent_variable'].map({'value_1': 1, 'value_2': 0})

## Predictions

In [None]:
# Create a new data frame, identical in organization to X.
# The constant is always 1, while each of the lines corresponds to an observation 
new_data = pd.DataFrame({'const': 1,'values_1': [1, 2], 'values_2': [3, 4]})
predictions = results.predict(new_data)