# Linear regression of ALFAH-SNP combined dataset

### Author: Mareli Mostert
### Date executed: 31 October 2023

#### Adapted from: How to perform regression models ~ Author: Vinet Coetzee 
#### Reference: https://github.com/vcoetzee/how-to-files/blob/main/how-to-regression-models.ipynb

## 1. Install and import the necesary packages and libraries

Different regression models in Python include:

* Multiple linear regression (forward method), 
* Regression tree, 
* Random forest, 
* Gradient boosting tree
* Support vector regression.

It was decided that Forward linear regressions need to be performed --> Multiple linear regression (forward method)

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import math
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn import linear_model
from sklearn.linear_model import LinearRegression
from statsmodels.stats.outliers_influence import variance_inflation_factor
from sklearn.metrics import r2_score
from sklearn.tree import DecisionTreeRegressor
from sklearn.tree import plot_tree
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn import svm
from tabulate import tabulate
from sklearn import tree
from sklearn.model_selection import RandomizedSearchCV

## 2. Read excel file into Pandas dataframe

Note: The 'Unnamed' column from the subset_data_cleaned.xlsx file was deleted before it was imported.

In [2]:
# Read the cleaned and transformed data's file into the pandas dataframe
# Replace 'C:/Users/...' with the relevant file path of the cleaned and transformed dataset file (.xls format)
df = pd.read_excel('C:/Users/...')

# To show entire list (including all rows and columns)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

# To see if the file loaded correctly
df.head()

Unnamed: 0,index,id,age,sex,body_fat,muscle_mass,rs174547,rs9939609,rs1558902,rs731236,rs1801282,rs964184,rs1544410,rs780094,rs1535,rs4506565,rs10830963,rs7903146,rs174537,rs174550,rs8192678,rs4740619,rs12255372,rs3734398,rs2236212,rs3798713,rs174548,rs174546,rs968567,rs1501299,rs1801133,rs4961,rs2241766,rs670,rs1799883,rs10182181,rs6265,rs3764261,rs13702,facility
0,0,1,20.8,1,3.2,51.7,3,2,3,2,1,1,2,3,1,1,1,1,1,3,3,1,2,3,3,3,1,1,1,3,1,1,2,2,3,3,3,2,2,upsala
1,1,2,20.6,0,20.3,36.2,3,2,3,3,1,1,3,3,1,2,1,2,1,3,3,2,3,3,3,2,1,1,1,3,1,2,3,1,3,3,3,1,2,upsala
2,2,3,19.9,0,20.0,33.3,2,2,3,3,1,1,3,2,2,3,1,3,2,2,3,2,2,3,3,3,2,2,1,2,1,1,3,1,2,2,3,2,2,upsala
3,3,4,23.1,0,39.6,42.9,3,2,3,3,1,2,3,2,1,2,1,1,1,3,3,2,3,3,3,3,1,1,1,3,1,1,3,1,3,3,3,2,3,upsala
4,4,5,22.5,0,24.6,42.0,3,2,3,3,1,1,2,2,2,2,1,2,1,3,3,3,1,3,3,3,1,1,1,2,1,1,3,1,3,3,3,2,3,upsala


## 3. Feature selection

Use individual correlations to get an indication of which SNPs are more highly correlated with each of the two dependent variables, namely, body fat and muscle mass.

In [3]:
### Body fat ###
# Identify the variables most highly correlated with chosen dependent variables
# Dependent variable: body_fat
# Independent variables: snp
df.corrwith(df['body_fat']).sort_values(ascending=False)

  df.corrwith(df['body_fat']).sort_values(ascending=False)


body_fat       1.000000
rs4506565      0.135116
rs7903146      0.107394
rs8192678      0.102634
age            0.097484
rs731236       0.095323
rs4740619      0.095192
rs9939609      0.074218
index          0.072651
id             0.072591
rs780094       0.054647
rs1801133      0.054359
rs1799883      0.051610
rs6265         0.050666
rs964184       0.047953
rs174546       0.044992
rs1801282      0.031082
rs13702        0.021053
rs1544410      0.016535
rs174537       0.000504
rs968567      -0.000235
rs10830963    -0.008631
rs1501299     -0.013357
rs2241766     -0.021300
rs1558902     -0.022499
rs10182181    -0.026700
rs3734398     -0.042221
rs670         -0.043323
rs174548      -0.050999
rs1535        -0.051277
rs12255372    -0.059041
rs174547      -0.062740
rs174550      -0.062740
rs2236212     -0.077501
rs3798713     -0.085987
rs3764261     -0.114982
rs4961        -0.168539
muscle_mass   -0.196345
sex           -0.270180
dtype: float64

In [4]:
### Muscle mass ###
# Identify the variables most highly correlated with chosen dependent variables
# Dependent variable: muscle_mass
# Independent variables: snp
df.corrwith(df['muscle_mass']).sort_values(ascending=False)

  df.corrwith(df['muscle_mass']).sort_values(ascending=False)


muscle_mass    1.000000
sex            0.307647
rs2236212      0.131490
rs12255372     0.126092
rs3734398      0.110453
rs968567       0.108330
age            0.108194
rs174548       0.084966
rs3798713      0.061814
rs1801282      0.049732
rs670          0.044786
rs731236       0.042903
rs174546       0.039875
rs1535         0.039338
rs174537       0.039207
rs1501299      0.034023
rs9939609      0.030731
rs780094       0.029470
id             0.028459
index          0.026846
rs13702        0.025180
rs4740619      0.021917
rs3764261      0.019404
rs1544410      0.013715
rs964184      -0.001726
rs1558902     -0.027519
rs10830963    -0.030431
rs10182181    -0.034104
rs7903146     -0.035086
rs174550      -0.035741
rs174547      -0.035741
rs4961        -0.035978
rs1799883     -0.038839
rs4506565     -0.064225
rs2241766     -0.077028
rs6265        -0.077495
rs8192678     -0.095558
rs1801133     -0.098419
body_fat      -0.196345
dtype: float64

In [5]:
### SNPs ###
# snp1 = 'rs174547'
# snp2 = 'rs9939609'
# snp3 = 'rs1558902'
# snp4 = 'rs731236'
# snp5 = 'rs1801282'
# snp6 = 'rs964184'
# snp7 = 'rs1544410'
# snp8 = 'rs780094'
# snp9 = 'rs1535'
# snp10 = 'rs4506565'
# snp11 = 'rs10830963'
# snp12 = 'rs7903146'
# snp13 = 'rs174537'
# snp14 = 'rs174550'
# snp15 = 'rs8192678'
# snp16 = 'rs4740619'
# snp17 = 'rs12255372'
# snp18 = 'rs3734398'
# snp19 = 'rs2236212'
# snp20 = 'rs3798713'
# snp21 = 'rs174548'
# snp22 = 'rs174546'
# snp23 = 'rs968567'
# snp24 = 'rs1501299'
# snp25 = 'rs1801133'
# snp26 = 'rs4961'
# snp27 = 'rs2241766'
# snp28 = 'rs670'
# snp29 = 'rs1799883'
# snp30 = 'rs10182181'
# snp31 = 'rs6265'
# snp32 = 'rs3764261'
# snp33 = 'rs13702'

## 4. Forward linear regressions

Sex had the highest correlation with both body fat and muscle mass.

Note: Body composition is sex influenced, therefore the coefficient of determination (R^2) was calculated for sex with body fat and muscle mass, respectively.

In [6]:
### Format of linear regression queries for each individual snp ###
# Reference 1: https://realpython.com/linear-regression-in-python/
# Reference 2: https://howtodoinjava.com/python-examples/python-print-to-file/

## Defining values (.values creates a numpy array)
## Note: x is the independent variable; y is the dependent variable
# x = df[['rs174547']].values
# y = df[['body_fat']].values 
## Note: if an error message is displayed, the array with the values may be the incorrect shape, thus:
# snp1 = x.reshape((-1, 1))

## Perform the linear regression
# lr1 = LinearRegression()
# lr1.fit(snp1, y)

## Evaluate the model's predictions
# y_pred_lr1=lr1.predict(x)

## Print the results to output file 
# r_sq = lr1.score(x, y)
# lr1_MSE = mean_squared_error(y, y_pred_lr1)
# sourceFile = open('body_fat.txt', 'w')
# print('SNP1', file = sourceFile)
# print(f"coefficient of determination(R2): {r_sq}", file = sourceFile)
# print(f"intercept: {lr1.intercept_}", file = sourceFile)
# print(f"slope: {lr1.coef_}", file = sourceFile), 
# print('MSE:', lr1_MSE, file = sourceFile)
# sourceFile.close()

## Body Fat

In [7]:
### Body Fat ###
# Note: the stated sourceFile (body_fat_snp_sex_linear_regressions) was created before the code was executed.

# sex
x = df[['sex']].values
y = df[['body_fat']].values 
lr = LinearRegression()
lr.fit(x, y)
y_pred_lr=lr.predict(x)
r_sq = lr.score(x, y)
lr_MSE = mean_squared_error(y, y_pred_lr)
sourceFile = open('body_fat_snp_sex_linear_regressions.txt', 'a')
print('sex', file = sourceFile)
print(f"coefficient of determination(R2): {r_sq}", file = sourceFile)
print(f"intercept: {lr.intercept_}", file = sourceFile)
print(f"slope: {lr.coef_}", file = sourceFile), 
print('MSE:', lr_MSE, file = sourceFile)
sourceFile.close()

# snp1 = 'rs174547'
x = df[['sex', 'rs174547']].values
y = df[['body_fat']].values 
lr = LinearRegression()
lr.fit(x, y)
y_pred_lr=lr.predict(x)
r_sq = lr.score(x, y)
lr_MSE = mean_squared_error(y, y_pred_lr)
sourceFile = open('body_fat_snp_sex_linear_regressions.txt', 'a')
print('SNP1', file = sourceFile)
print(f"coefficient of determination(R2): {r_sq}", file = sourceFile)
print(f"intercept: {lr.intercept_}", file = sourceFile)
print(f"slope: {lr.coef_}", file = sourceFile), 
print('MSE:', lr_MSE, file = sourceFile)
sourceFile.close()

# snp2 = 'rs9939609'
x = df[['sex', 'rs9939609']].values
y = df[['body_fat']].values 
lr = LinearRegression()
lr.fit(x, y)
y_pred_lr=lr.predict(x)
r_sq = lr.score(x, y)
lr_MSE = mean_squared_error(y, y_pred_lr)
sourceFile = open('body_fat_snp_sex_linear_regressions.txt', 'a')
print('SNP2', file = sourceFile)
print(f"coefficient of determination(R2): {r_sq}", file = sourceFile)
print(f"intercept: {lr.intercept_}", file = sourceFile)
print(f"slope: {lr.coef_}", file = sourceFile), 
print('MSE:', lr_MSE, file = sourceFile)
sourceFile.close()

# snp3 = 'rs1558902'
x = df[['sex', 'rs1558902']].values
y = df[['body_fat']].values 
lr = LinearRegression()
lr.fit(x, y)
y_pred_lr=lr.predict(x)
r_sq = lr.score(x, y)
lr_MSE = mean_squared_error(y, y_pred_lr)
sourceFile = open('body_fat_snp_sex_linear_regressions.txt', 'a')
print('SNP3', file = sourceFile)
print(f"coefficient of determination(R2): {r_sq}", file = sourceFile)
print(f"intercept: {lr.intercept_}", file = sourceFile)
print(f"slope: {lr.coef_}", file = sourceFile), 
print('MSE:', lr_MSE, file = sourceFile)
sourceFile.close()

# snp4 = 'rs731236'
x = df[['sex', 'rs731236']].values
y = df[['body_fat']].values 
lr = LinearRegression()
lr.fit(x, y)
y_pred_lr=lr.predict(x)
r_sq = lr.score(x, y)
lr_MSE = mean_squared_error(y, y_pred_lr)
sourceFile = open('body_fat_snp_sex_linear_regressions.txt', 'a')
print('SNP4', file = sourceFile)
print(f"coefficient of determination(R2): {r_sq}", file = sourceFile)
print(f"intercept: {lr.intercept_}", file = sourceFile)
print(f"slope: {lr.coef_}", file = sourceFile), 
print('MSE:', lr_MSE, file = sourceFile)
sourceFile.close()

# snp5 = 'rs1801282'
x = df[['sex', 'rs1801282']].values
y = df[['body_fat']].values 
lr = LinearRegression()
lr.fit(x, y)
y_pred_lr=lr.predict(x)
r_sq = lr.score(x, y)
lr_MSE = mean_squared_error(y, y_pred_lr)
sourceFile = open('body_fat_snp_sex_linear_regressions.txt', 'a')
print('SNP5', file = sourceFile)
print(f"coefficient of determination(R2): {r_sq}", file = sourceFile)
print(f"intercept: {lr.intercept_}", file = sourceFile)
print(f"slope: {lr.coef_}", file = sourceFile), 
print('MSE:', lr_MSE, file = sourceFile)
sourceFile.close()

# snp6 = 'rs964184'
x = df[['sex', 'rs964184']].values
y = df[['body_fat']].values 
lr = LinearRegression()
lr.fit(x, y)
y_pred_lr=lr.predict(x)
r_sq = lr.score(x, y)
lr_MSE = mean_squared_error(y, y_pred_lr)
sourceFile = open('body_fat_snp_sex_linear_regressions.txt', 'a')
print('SNP6', file = sourceFile)
print(f"coefficient of determination(R2): {r_sq}", file = sourceFile)
print(f"intercept: {lr.intercept_}", file = sourceFile)
print(f"slope: {lr.coef_}", file = sourceFile), 
print('MSE:', lr_MSE, file = sourceFile)
sourceFile.close()

# snp7 = 'rs1544410'
x = df[['sex', 'rs1544410']].values
y = df[['body_fat']].values 
lr = LinearRegression()
lr.fit(x, y)
y_pred_lr=lr.predict(x)
r_sq = lr.score(x, y)
lr_MSE = mean_squared_error(y, y_pred_lr)
sourceFile = open('body_fat_snp_sex_linear_regressions.txt', 'a')
print('SNP7', file = sourceFile)
print(f"coefficient of determination(R2): {r_sq}", file = sourceFile)
print(f"intercept: {lr.intercept_}", file = sourceFile)
print(f"slope: {lr.coef_}", file = sourceFile), 
print('MSE:', lr_MSE, file = sourceFile)
sourceFile.close()

# snp8 = 'rs780094'
x = df[['sex', 'rs780094']].values
y = df[['body_fat']].values 
lr = LinearRegression()
lr.fit(x, y)
y_pred_lr=lr.predict(x)
r_sq = lr.score(x, y)
lr_MSE = mean_squared_error(y, y_pred_lr)
sourceFile = open('body_fat_snp_sex_linear_regressions.txt', 'a')
print('SNP8', file = sourceFile)
print(f"coefficient of determination(R2): {r_sq}", file = sourceFile)
print(f"intercept: {lr.intercept_}", file = sourceFile)
print(f"slope: {lr.coef_}", file = sourceFile), 
print('MSE:', lr_MSE, file = sourceFile)
sourceFile.close()

# snp9 = 'rs1535'
x = df[['sex', 'rs1535']].values
y = df[['body_fat']].values 
lr = LinearRegression()
lr.fit(x, y)
y_pred_lr=lr.predict(x)
r_sq = lr.score(x, y)
lr_MSE = mean_squared_error(y, y_pred_lr)
sourceFile = open('body_fat_snp_sex_linear_regressions.txt', 'a')
print('SNP9', file = sourceFile)
print(f"coefficient of determination(R2): {r_sq}", file = sourceFile)
print(f"intercept: {lr.intercept_}", file = sourceFile)
print(f"slope: {lr.coef_}", file = sourceFile), 
print('MSE:', lr_MSE, file = sourceFile)
sourceFile.close()

# snp10 = 'rs4506565'
x = df[['sex', 'rs4506565']].values
y = df[['body_fat']].values 
lr = LinearRegression()
lr.fit(x, y)
y_pred_lr=lr.predict(x)
r_sq = lr.score(x, y)
lr_MSE = mean_squared_error(y, y_pred_lr)
sourceFile = open('body_fat_snp_sex_linear_regressions.txt', 'a')
print('SNP10', file = sourceFile)
print(f"coefficient of determination(R2): {r_sq}", file = sourceFile)
print(f"intercept: {lr.intercept_}", file = sourceFile)
print(f"slope: {lr.coef_}", file = sourceFile), 
print('MSE:', lr_MSE, file = sourceFile)
sourceFile.close()

# snp11 = 'rs10830963'
x = df[['sex', 'rs10830963']].values
y = df[['body_fat']].values 
lr = LinearRegression()
lr.fit(x, y)
y_pred_lr=lr.predict(x)
r_sq = lr.score(x, y)
lr_MSE = mean_squared_error(y, y_pred_lr)
sourceFile = open('body_fat_snp_sex_linear_regressions.txt', 'a')
print('SNP11', file = sourceFile)
print(f"coefficient of determination(R2): {r_sq}", file = sourceFile)
print(f"intercept: {lr.intercept_}", file = sourceFile)
print(f"slope: {lr.coef_}", file = sourceFile), 
print('MSE:', lr_MSE, file = sourceFile)
sourceFile.close()

# snp12 = 'rs7903146'
x = df[['sex', 'rs7903146']].values
y = df[['body_fat']].values 
lr = LinearRegression()
lr.fit(x, y)
y_pred_lr=lr.predict(x)
r_sq = lr.score(x, y)
lr_MSE = mean_squared_error(y, y_pred_lr)
sourceFile = open('body_fat_snp_sex_linear_regressions.txt', 'a')
print('SNP12', file = sourceFile)
print(f"coefficient of determination(R2): {r_sq}", file = sourceFile)
print(f"intercept: {lr.intercept_}", file = sourceFile)
print(f"slope: {lr.coef_}", file = sourceFile), 
print('MSE:', lr_MSE, file = sourceFile)
sourceFile.close()

# snp13 = 'rs174537'
x = df[['sex', 'rs174537']].values
y = df[['body_fat']].values 
lr = LinearRegression()
lr.fit(x, y)
y_pred_lr=lr.predict(x)
r_sq = lr.score(x, y)
lr_MSE = mean_squared_error(y, y_pred_lr)
sourceFile = open('body_fat_snp_sex_linear_regressions.txt', 'a')
print('SNP13', file = sourceFile)
print(f"coefficient of determination(R2): {r_sq}", file = sourceFile)
print(f"intercept: {lr.intercept_}", file = sourceFile)
print(f"slope: {lr.coef_}", file = sourceFile), 
print('MSE:', lr_MSE, file = sourceFile)
sourceFile.close()

# snp14 = 'rs174550'
x = df[['sex', 'rs174550']].values
y = df[['body_fat']].values 
lr = LinearRegression()
lr.fit(x, y)
y_pred_lr=lr.predict(x)
r_sq = lr.score(x, y)
lr_MSE = mean_squared_error(y, y_pred_lr)
sourceFile = open('body_fat_snp_sex_linear_regressions.txt', 'a')
print('SNP14', file = sourceFile)
print(f"coefficient of determination(R2): {r_sq}", file = sourceFile)
print(f"intercept: {lr.intercept_}", file = sourceFile)
print(f"slope: {lr.coef_}", file = sourceFile), 
print('MSE:', lr_MSE, file = sourceFile)
sourceFile.close()

# snp15 = 'rs8192678'
x = df[['sex', 'rs8192678']].values
y = df[['body_fat']].values 
lr = LinearRegression()
lr.fit(x, y)
y_pred_lr=lr.predict(x)
r_sq = lr.score(x, y)
lr_MSE = mean_squared_error(y, y_pred_lr)
sourceFile = open('body_fat_snp_sex_linear_regressions.txt', 'a')
print('SNP15', file = sourceFile)
print(f"coefficient of determination(R2): {r_sq}", file = sourceFile)
print(f"intercept: {lr.intercept_}", file = sourceFile)
print(f"slope: {lr.coef_}", file = sourceFile), 
print('MSE:', lr_MSE, file = sourceFile)
sourceFile.close()

# snp16 = 'rs4740619'
x = df[['sex', 'rs4740619']].values
y = df[['body_fat']].values 
lr = LinearRegression()
lr.fit(x, y)
y_pred_lr=lr.predict(x)
r_sq = lr.score(x, y)
lr_MSE = mean_squared_error(y, y_pred_lr)
sourceFile = open('body_fat_snp_sex_linear_regressions.txt', 'a')
print('SNP16', file = sourceFile)
print(f"coefficient of determination(R2): {r_sq}", file = sourceFile)
print(f"intercept: {lr.intercept_}", file = sourceFile)
print(f"slope: {lr.coef_}", file = sourceFile), 
print('MSE:', lr_MSE, file = sourceFile)
sourceFile.close()

# snp17 = 'rs12255372'
x = df[['sex', 'rs12255372']].values
y = df[['body_fat']].values 
lr = LinearRegression()
lr.fit(x, y)
y_pred_lr=lr.predict(x)
r_sq = lr.score(x, y)
lr_MSE = mean_squared_error(y, y_pred_lr)
sourceFile = open('body_fat_snp_sex_linear_regressions.txt', 'a')
print('SNP17', file = sourceFile)
print(f"coefficient of determination(R2): {r_sq}", file = sourceFile)
print(f"intercept: {lr.intercept_}", file = sourceFile)
print(f"slope: {lr.coef_}", file = sourceFile), 
print('MSE:', lr_MSE, file = sourceFile)
sourceFile.close()

# snp18 = 'rs3734398'
x = df[['sex', 'rs3734398']].values
y = df[['body_fat']].values 
lr = LinearRegression()
lr.fit(x, y)
y_pred_lr=lr.predict(x)
r_sq = lr.score(x, y)
lr_MSE = mean_squared_error(y, y_pred_lr)
sourceFile = open('body_fat_snp_sex_linear_regressions.txt', 'a')
print('SNP18', file = sourceFile)
print(f"coefficient of determination(R2): {r_sq}", file = sourceFile)
print(f"intercept: {lr.intercept_}", file = sourceFile)
print(f"slope: {lr.coef_}", file = sourceFile), 
print('MSE:', lr_MSE, file = sourceFile)
sourceFile.close()

# snp19 = 'rs2236212'
x = df[['sex', 'rs2236212']].values
y = df[['body_fat']].values 
lr = LinearRegression()
lr.fit(x, y)
y_pred_lr=lr.predict(x)
r_sq = lr.score(x, y)
lr_MSE = mean_squared_error(y, y_pred_lr)
sourceFile = open('body_fat_snp_sex_linear_regressions.txt', 'a')
print('SNP19', file = sourceFile)
print(f"coefficient of determination(R2): {r_sq}", file = sourceFile)
print(f"intercept: {lr.intercept_}", file = sourceFile)
print(f"slope: {lr.coef_}", file = sourceFile), 
print('MSE:', lr_MSE, file = sourceFile)
sourceFile.close()

# snp20 = 'rs3798713'
x = df[['sex', 'rs3798713']].values
y = df[['body_fat']].values 
lr = LinearRegression()
lr.fit(x, y)
y_pred_lr=lr.predict(x)
r_sq = lr.score(x, y)
lr_MSE = mean_squared_error(y, y_pred_lr)
sourceFile = open('body_fat_snp_sex_linear_regressions.txt', 'a')
print('SNP20', file = sourceFile)
print(f"coefficient of determination(R2): {r_sq}", file = sourceFile)
print(f"intercept: {lr.intercept_}", file = sourceFile)
print(f"slope: {lr.coef_}", file = sourceFile), 
print('MSE:', lr_MSE, file = sourceFile)
sourceFile.close()

# snp21 = 'rs174548'
x = df[['sex', 'rs174548']].values
y = df[['body_fat']].values 
lr = LinearRegression()
lr.fit(x, y)
y_pred_lr=lr.predict(x)
r_sq = lr.score(x, y)
lr_MSE = mean_squared_error(y, y_pred_lr)
sourceFile = open('body_fat_snp_sex_linear_regressions.txt', 'a')
print('SNP21', file = sourceFile)
print(f"coefficient of determination(R2): {r_sq}", file = sourceFile)
print(f"intercept: {lr.intercept_}", file = sourceFile)
print(f"slope: {lr.coef_}", file = sourceFile), 
print('MSE:', lr_MSE, file = sourceFile)
sourceFile.close()

# snp22 = 'rs174546'
x = df[['sex', 'rs174546']].values
y = df[['body_fat']].values 
lr = LinearRegression()
lr.fit(x, y)
y_pred_lr=lr.predict(x)
r_sq = lr.score(x, y)
lr_MSE = mean_squared_error(y, y_pred_lr)
sourceFile = open('body_fat_snp_sex_linear_regressions.txt', 'a')
print('SNP22', file = sourceFile)
print(f"coefficient of determination(R2): {r_sq}", file = sourceFile)
print(f"intercept: {lr.intercept_}", file = sourceFile)
print(f"slope: {lr.coef_}", file = sourceFile), 
print('MSE:', lr_MSE, file = sourceFile)
sourceFile.close()

# snp23 = 'rs968567'
x = df[['sex', 'rs968567']].values
y = df[['body_fat']].values 
lr = LinearRegression()
lr.fit(x, y)
y_pred_lr=lr.predict(x)
r_sq = lr.score(x, y)
lr_MSE = mean_squared_error(y, y_pred_lr)
sourceFile = open('body_fat_snp_sex_linear_regressions.txt', 'a')
print('SNP23', file = sourceFile)
print(f"coefficient of determination(R2): {r_sq}", file = sourceFile)
print(f"intercept: {lr.intercept_}", file = sourceFile)
print(f"slope: {lr.coef_}", file = sourceFile), 
print('MSE:', lr_MSE, file = sourceFile)
sourceFile.close()

# snp24 = 'rs1501299'
x = df[['sex', 'rs1501299']].values
y = df[['body_fat']].values 
lr = LinearRegression()
lr.fit(x, y)
y_pred_lr=lr.predict(x)
r_sq = lr.score(x, y)
lr_MSE = mean_squared_error(y, y_pred_lr)
sourceFile = open('body_fat_snp_sex_linear_regressions.txt', 'a')
print('SNP24', file = sourceFile)
print(f"coefficient of determination(R2): {r_sq}", file = sourceFile)
print(f"intercept: {lr.intercept_}", file = sourceFile)
print(f"slope: {lr.coef_}", file = sourceFile), 
print('MSE:', lr_MSE, file = sourceFile)
sourceFile.close()

# snp25 = 'rs1801133'
x = df[['sex', 'rs1801133']].values
y = df[['body_fat']].values 
lr = LinearRegression()
lr.fit(x, y)
y_pred_lr=lr.predict(x)
r_sq = lr.score(x, y)
lr_MSE = mean_squared_error(y, y_pred_lr)
sourceFile = open('body_fat_snp_sex_linear_regressions.txt', 'a')
print('SNP25', file = sourceFile)
print(f"coefficient of determination(R2): {r_sq}", file = sourceFile)
print(f"intercept: {lr.intercept_}", file = sourceFile)
print(f"slope: {lr.coef_}", file = sourceFile), 
print('MSE:', lr_MSE, file = sourceFile)
sourceFile.close()

# snp26 = 'rs4961'
x = df[['sex', 'rs4961']].values
y = df[['body_fat']].values 
lr = LinearRegression()
lr.fit(x, y)
y_pred_lr=lr.predict(x)
r_sq = lr.score(x, y)
lr_MSE = mean_squared_error(y, y_pred_lr)
sourceFile = open('body_fat_snp_sex_linear_regressions.txt', 'a')
print('SNP26', file = sourceFile)
print(f"coefficient of determination(R2): {r_sq}", file = sourceFile)
print(f"intercept: {lr.intercept_}", file = sourceFile)
print(f"slope: {lr.coef_}", file = sourceFile), 
print('MSE:', lr_MSE, file = sourceFile)
sourceFile.close()

# snp27 = 'rs2241766'
x = df[['sex', 'rs2241766']].values
y = df[['body_fat']].values 
lr = LinearRegression()
lr.fit(x, y)
y_pred_lr=lr.predict(x)
r_sq = lr.score(x, y)
lr_MSE = mean_squared_error(y, y_pred_lr)
sourceFile = open('body_fat_snp_sex_linear_regressions.txt', 'a')
print('SNP27', file = sourceFile)
print(f"coefficient of determination(R2): {r_sq}", file = sourceFile)
print(f"intercept: {lr.intercept_}", file = sourceFile)
print(f"slope: {lr.coef_}", file = sourceFile), 
print('MSE:', lr_MSE, file = sourceFile)
sourceFile.close()

# snp28 = 'rs670'
x = df[['sex', 'rs670']].values
y = df[['body_fat']].values 
lr = LinearRegression()
lr.fit(x, y)
y_pred_lr=lr.predict(x)
r_sq = lr.score(x, y)
lr_MSE = mean_squared_error(y, y_pred_lr)
sourceFile = open('body_fat_snp_sex_linear_regressions.txt', 'a')
print('SNP28', file = sourceFile)
print(f"coefficient of determination(R2): {r_sq}", file = sourceFile)
print(f"intercept: {lr.intercept_}", file = sourceFile)
print(f"slope: {lr.coef_}", file = sourceFile), 
print('MSE:', lr_MSE, file = sourceFile)
sourceFile.close()

# snp29 = 'rs1799883'
x = df[['sex', 'rs1799883']].values
y = df[['body_fat']].values 
lr = LinearRegression()
lr.fit(x, y)
y_pred_lr=lr.predict(x)
r_sq = lr.score(x, y)
lr_MSE = mean_squared_error(y, y_pred_lr)
sourceFile = open('body_fat_snp_sex_linear_regressions.txt', 'a')
print('SNP29', file = sourceFile)
print(f"coefficient of determination(R2): {r_sq}", file = sourceFile)
print(f"intercept: {lr.intercept_}", file = sourceFile)
print(f"slope: {lr.coef_}", file = sourceFile), 
print('MSE:', lr_MSE, file = sourceFile)
sourceFile.close()

# snp30 = 'rs10182181'
x = df[['sex', 'rs10182181']].values
y = df[['body_fat']].values 
lr = LinearRegression()
lr.fit(x, y)
y_pred_lr=lr.predict(x)
r_sq = lr.score(x, y)
lr_MSE = mean_squared_error(y, y_pred_lr)
sourceFile = open('body_fat_snp_sex_linear_regressions.txt', 'a')
print('SNP30', file = sourceFile)
print(f"coefficient of determination(R2): {r_sq}", file = sourceFile)
print(f"intercept: {lr.intercept_}", file = sourceFile)
print(f"slope: {lr.coef_}", file = sourceFile), 
print('MSE:', lr_MSE, file = sourceFile)
sourceFile.close()

# snp31 = 'rs6265'
x = df[['sex', 'rs6265']].values
y = df[['body_fat']].values 
lr = LinearRegression()
lr.fit(x, y)
y_pred_lr=lr.predict(x)
r_sq = lr.score(x, y)
lr_MSE = mean_squared_error(y, y_pred_lr)
sourceFile = open('body_fat_snp_sex_linear_regressions.txt', 'a')
print('SNP31', file = sourceFile)
print(f"coefficient of determination(R2): {r_sq}", file = sourceFile)
print(f"intercept: {lr.intercept_}", file = sourceFile)
print(f"slope: {lr.coef_}", file = sourceFile), 
print('MSE:', lr_MSE, file = sourceFile)
sourceFile.close()

# snp32 = 'rs3764261'
x = df[['sex', 'rs3764261']].values
y = df[['body_fat']].values 
lr = LinearRegression()
lr.fit(x, y)
y_pred_lr=lr.predict(x)
r_sq = lr.score(x, y)
lr_MSE = mean_squared_error(y, y_pred_lr)
sourceFile = open('body_fat_snp_sex_linear_regressions.txt', 'a')
print('SNP32', file = sourceFile)
print(f"coefficient of determination(R2): {r_sq}", file = sourceFile)
print(f"intercept: {lr.intercept_}", file = sourceFile)
print(f"slope: {lr.coef_}", file = sourceFile), 
print('MSE:', lr_MSE, file = sourceFile)
sourceFile.close()

# snp33 = 'rs13702'
x = df[['sex', 'rs13702']].values
y = df[['body_fat']].values 
lr = LinearRegression()
lr.fit(x, y)
y_pred_lr=lr.predict(x)
r_sq = lr.score(x, y)
lr_MSE = mean_squared_error(y, y_pred_lr)
sourceFile = open('body_fat_snp_sex_linear_regressions.txt', 'a')
print('SNP33', file = sourceFile)
print(f"coefficient of determination(R2): {r_sq}", file = sourceFile)
print(f"intercept: {lr.intercept_}", file = sourceFile)
print(f"slope: {lr.coef_}", file = sourceFile), 
print('MSE:', lr_MSE, file = sourceFile)
sourceFile.close()

## Muscle Mass

In [8]:
### Muscle Mass ###
# Note: the stated sourceFile (muscle_mass_snp_sex_linear_regressions) was created before the code was executed.

# sex
x = df[['sex']].values
y = df[['muscle_mass']].values 
lr = LinearRegression()
lr.fit(x, y)
y_pred_lr=lr.predict(x)
r_sq = lr.score(x, y)
lr_MSE = mean_squared_error(y, y_pred_lr)
sourceFile = open('muscle_mass_snp_sex_linear_regressions.txt', 'a')
print('sex', file = sourceFile)
print(f"coefficient of determination(R2): {r_sq}", file = sourceFile)
print(f"intercept: {lr.intercept_}", file = sourceFile)
print(f"slope: {lr.coef_}", file = sourceFile), 
print('MSE:', lr_MSE, file = sourceFile)
sourceFile.close()

# snp1 = 'rs174547'
x = df[['sex', 'rs174547']].values
y = df[['muscle_mass']].values 
lr = LinearRegression()
lr.fit(x, y)
y_pred_lr=lr.predict(x)
r_sq = lr.score(x, y)
lr_MSE = mean_squared_error(y, y_pred_lr)
sourceFile = open('muscle_mass_snp_sex_linear_regressions.txt', 'a')
print('SNP1', file = sourceFile)
print(f"coefficient of determination(R2): {r_sq}", file = sourceFile)
print(f"intercept: {lr.intercept_}", file = sourceFile)
print(f"slope: {lr.coef_}", file = sourceFile), 
print('MSE:', lr_MSE, file = sourceFile)
sourceFile.close()

# snp2 = 'rs9939609'
x = df[['sex', 'rs9939609']].values
y = df[['muscle_mass']].values 
lr = LinearRegression()
lr.fit(x, y)
y_pred_lr=lr.predict(x)
r_sq = lr.score(x, y)
lr_MSE = mean_squared_error(y, y_pred_lr)
sourceFile = open('muscle_mass_snp_sex_linear_regressions.txt', 'a')
print('SNP2', file = sourceFile)
print(f"coefficient of determination(R2): {r_sq}", file = sourceFile)
print(f"intercept: {lr.intercept_}", file = sourceFile)
print(f"slope: {lr.coef_}", file = sourceFile), 
print('MSE:', lr_MSE, file = sourceFile)
sourceFile.close()

# snp3 = 'rs1558902'
x = df[['sex', 'rs1558902']].values
y = df[['muscle_mass']].values 
lr = LinearRegression()
lr.fit(x, y)
y_pred_lr=lr.predict(x)
r_sq = lr.score(x, y)
lr_MSE = mean_squared_error(y, y_pred_lr)
sourceFile = open('muscle_mass_snp_sex_linear_regressions.txt', 'a')
print('SNP3', file = sourceFile)
print(f"coefficient of determination(R2): {r_sq}", file = sourceFile)
print(f"intercept: {lr.intercept_}", file = sourceFile)
print(f"slope: {lr.coef_}", file = sourceFile), 
print('MSE:', lr_MSE, file = sourceFile)
sourceFile.close()

# snp4 = 'rs731236'
x = df[['sex', 'rs731236']].values
y = df[['muscle_mass']].values 
lr = LinearRegression()
lr.fit(x, y)
y_pred_lr=lr.predict(x)
r_sq = lr.score(x, y)
lr_MSE = mean_squared_error(y, y_pred_lr)
sourceFile = open('muscle_mass_snp_sex_linear_regressions.txt', 'a')
print('SNP4', file = sourceFile)
print(f"coefficient of determination(R2): {r_sq}", file = sourceFile)
print(f"intercept: {lr.intercept_}", file = sourceFile)
print(f"slope: {lr.coef_}", file = sourceFile), 
print('MSE:', lr_MSE, file = sourceFile)
sourceFile.close()

# snp5 = 'rs1801282'
x = df[['sex', 'rs1801282']].values
y = df[['muscle_mass']].values 
lr = LinearRegression()
lr.fit(x, y)
y_pred_lr=lr.predict(x)
r_sq = lr.score(x, y)
lr_MSE = mean_squared_error(y, y_pred_lr)
sourceFile = open('muscle_mass_snp_sex_linear_regressions.txt', 'a')
print('SNP5', file = sourceFile)
print(f"coefficient of determination(R2): {r_sq}", file = sourceFile)
print(f"intercept: {lr.intercept_}", file = sourceFile)
print(f"slope: {lr.coef_}", file = sourceFile), 
print('MSE:', lr_MSE, file = sourceFile)
sourceFile.close()

# snp6 = 'rs964184'
x = df[['sex', 'rs964184']].values
y = df[['muscle_mass']].values 
lr = LinearRegression()
lr.fit(x, y)
y_pred_lr=lr.predict(x)
r_sq = lr.score(x, y)
lr_MSE = mean_squared_error(y, y_pred_lr)
sourceFile = open('muscle_mass_snp_sex_linear_regressions.txt', 'a')
print('SNP6', file = sourceFile)
print(f"coefficient of determination(R2): {r_sq}", file = sourceFile)
print(f"intercept: {lr.intercept_}", file = sourceFile)
print(f"slope: {lr.coef_}", file = sourceFile), 
print('MSE:', lr_MSE, file = sourceFile)
sourceFile.close()

# snp7 = 'rs1544410'
x = df[['sex', 'rs1544410']].values
y = df[['muscle_mass']].values 
lr = LinearRegression()
lr.fit(x, y)
y_pred_lr=lr.predict(x)
r_sq = lr.score(x, y)
lr_MSE = mean_squared_error(y, y_pred_lr)
sourceFile = open('muscle_mass_snp_sex_linear_regressions.txt', 'a')
print('SNP7', file = sourceFile)
print(f"coefficient of determination(R2): {r_sq}", file = sourceFile)
print(f"intercept: {lr.intercept_}", file = sourceFile)
print(f"slope: {lr.coef_}", file = sourceFile), 
print('MSE:', lr_MSE, file = sourceFile)
sourceFile.close()

# snp8 = 'rs780094'
x = df[['sex', 'rs780094']].values
y = df[['muscle_mass']].values 
lr = LinearRegression()
lr.fit(x, y)
y_pred_lr=lr.predict(x)
r_sq = lr.score(x, y)
lr_MSE = mean_squared_error(y, y_pred_lr)
sourceFile = open('muscle_mass_snp_sex_linear_regressions.txt', 'a')
print('SNP8', file = sourceFile)
print(f"coefficient of determination(R2): {r_sq}", file = sourceFile)
print(f"intercept: {lr.intercept_}", file = sourceFile)
print(f"slope: {lr.coef_}", file = sourceFile), 
print('MSE:', lr_MSE, file = sourceFile)
sourceFile.close()

# snp9 = 'rs1535'
x = df[['sex', 'rs1535']].values
y = df[['muscle_mass']].values 
lr = LinearRegression()
lr.fit(x, y)
y_pred_lr=lr.predict(x)
r_sq = lr.score(x, y)
lr_MSE = mean_squared_error(y, y_pred_lr)
sourceFile = open('muscle_mass_snp_sex_linear_regressions.txt', 'a')
print('SNP9', file = sourceFile)
print(f"coefficient of determination(R2): {r_sq}", file = sourceFile)
print(f"intercept: {lr.intercept_}", file = sourceFile)
print(f"slope: {lr.coef_}", file = sourceFile), 
print('MSE:', lr_MSE, file = sourceFile)
sourceFile.close()

# snp10 = 'rs4506565'
x = df[['sex', 'rs4506565']].values
y = df[['muscle_mass']].values 
lr = LinearRegression()
lr.fit(x, y)
y_pred_lr=lr.predict(x)
r_sq = lr.score(x, y)
lr_MSE = mean_squared_error(y, y_pred_lr)
sourceFile = open('muscle_mass_snp_sex_linear_regressions.txt', 'a')
print('SNP10', file = sourceFile)
print(f"coefficient of determination(R2): {r_sq}", file = sourceFile)
print(f"intercept: {lr.intercept_}", file = sourceFile)
print(f"slope: {lr.coef_}", file = sourceFile), 
print('MSE:', lr_MSE, file = sourceFile)
sourceFile.close()

# snp11 = 'rs10830963'
x = df[['sex', 'rs10830963']].values
y = df[['muscle_mass']].values 
lr = LinearRegression()
lr.fit(x, y)
y_pred_lr=lr.predict(x)
r_sq = lr.score(x, y)
lr_MSE = mean_squared_error(y, y_pred_lr)
sourceFile = open('muscle_mass_snp_sex_linear_regressions.txt', 'a')
print('SNP11', file = sourceFile)
print(f"coefficient of determination(R2): {r_sq}", file = sourceFile)
print(f"intercept: {lr.intercept_}", file = sourceFile)
print(f"slope: {lr.coef_}", file = sourceFile), 
print('MSE:', lr_MSE, file = sourceFile)
sourceFile.close()

# snp12 = 'rs7903146'
x = df[['sex', 'rs7903146']].values
y = df[['muscle_mass']].values 
lr = LinearRegression()
lr.fit(x, y)
y_pred_lr=lr.predict(x)
r_sq = lr.score(x, y)
lr_MSE = mean_squared_error(y, y_pred_lr)
sourceFile = open('muscle_mass_snp_sex_linear_regressions.txt', 'a')
print('SNP12', file = sourceFile)
print(f"coefficient of determination(R2): {r_sq}", file = sourceFile)
print(f"intercept: {lr.intercept_}", file = sourceFile)
print(f"slope: {lr.coef_}", file = sourceFile), 
print('MSE:', lr_MSE, file = sourceFile)
sourceFile.close()

# snp13 = 'rs174537'
x = df[['sex', 'rs174537']].values
y = df[['muscle_mass']].values 
lr = LinearRegression()
lr.fit(x, y)
y_pred_lr=lr.predict(x)
r_sq = lr.score(x, y)
lr_MSE = mean_squared_error(y, y_pred_lr)
sourceFile = open('muscle_mass_snp_sex_linear_regressions.txt', 'a')
print('SNP13', file = sourceFile)
print(f"coefficient of determination(R2): {r_sq}", file = sourceFile)
print(f"intercept: {lr.intercept_}", file = sourceFile)
print(f"slope: {lr.coef_}", file = sourceFile), 
print('MSE:', lr_MSE, file = sourceFile)
sourceFile.close()

# snp14 = 'rs174550'
x = df[['sex', 'rs174550']].values
y = df[['muscle_mass']].values 
lr = LinearRegression()
lr.fit(x, y)
y_pred_lr=lr.predict(x)
r_sq = lr.score(x, y)
lr_MSE = mean_squared_error(y, y_pred_lr)
sourceFile = open('muscle_mass_snp_sex_linear_regressions.txt', 'a')
print('SNP14', file = sourceFile)
print(f"coefficient of determination(R2): {r_sq}", file = sourceFile)
print(f"intercept: {lr.intercept_}", file = sourceFile)
print(f"slope: {lr.coef_}", file = sourceFile), 
print('MSE:', lr_MSE, file = sourceFile)
sourceFile.close()

# snp15 = 'rs8192678'
x = df[['sex', 'rs8192678']].values
y = df[['muscle_mass']].values 
lr = LinearRegression()
lr.fit(x, y)
y_pred_lr=lr.predict(x)
r_sq = lr.score(x, y)
lr_MSE = mean_squared_error(y, y_pred_lr)
sourceFile = open('muscle_mass_snp_sex_linear_regressions.txt', 'a')
print('SNP15', file = sourceFile)
print(f"coefficient of determination(R2): {r_sq}", file = sourceFile)
print(f"intercept: {lr.intercept_}", file = sourceFile)
print(f"slope: {lr.coef_}", file = sourceFile), 
print('MSE:', lr_MSE, file = sourceFile)
sourceFile.close()

# snp16 = 'rs4740619'
x = df[['sex', 'rs4740619']].values
y = df[['muscle_mass']].values 
lr = LinearRegression()
lr.fit(x, y)
y_pred_lr=lr.predict(x)
r_sq = lr.score(x, y)
lr_MSE = mean_squared_error(y, y_pred_lr)
sourceFile = open('muscle_mass_snp_sex_linear_regressions.txt', 'a')
print('SNP16', file = sourceFile)
print(f"coefficient of determination(R2): {r_sq}", file = sourceFile)
print(f"intercept: {lr.intercept_}", file = sourceFile)
print(f"slope: {lr.coef_}", file = sourceFile), 
print('MSE:', lr_MSE, file = sourceFile)
sourceFile.close()

# snp17 = 'rs12255372'
x = df[['sex', 'rs12255372']].values
y = df[['muscle_mass']].values 
lr = LinearRegression()
lr.fit(x, y)
y_pred_lr=lr.predict(x)
r_sq = lr.score(x, y)
lr_MSE = mean_squared_error(y, y_pred_lr)
sourceFile = open('muscle_mass_snp_sex_linear_regressions.txt', 'a')
print('SNP17', file = sourceFile)
print(f"coefficient of determination(R2): {r_sq}", file = sourceFile)
print(f"intercept: {lr.intercept_}", file = sourceFile)
print(f"slope: {lr.coef_}", file = sourceFile), 
print('MSE:', lr_MSE, file = sourceFile)
sourceFile.close()

# snp18 = 'rs3734398'
x = df[['sex', 'rs3734398']].values
y = df[['muscle_mass']].values 
lr = LinearRegression()
lr.fit(x, y)
y_pred_lr=lr.predict(x)
r_sq = lr.score(x, y)
lr_MSE = mean_squared_error(y, y_pred_lr)
sourceFile = open('muscle_mass_snp_sex_linear_regressions.txt', 'a')
print('SNP18', file = sourceFile)
print(f"coefficient of determination(R2): {r_sq}", file = sourceFile)
print(f"intercept: {lr.intercept_}", file = sourceFile)
print(f"slope: {lr.coef_}", file = sourceFile), 
print('MSE:', lr_MSE, file = sourceFile)
sourceFile.close()

# snp19 = 'rs2236212'
x = df[['sex', 'rs2236212']].values
y = df[['muscle_mass']].values 
lr = LinearRegression()
lr.fit(x, y)
y_pred_lr=lr.predict(x)
r_sq = lr.score(x, y)
lr_MSE = mean_squared_error(y, y_pred_lr)
sourceFile = open('muscle_mass_snp_sex_linear_regressions.txt', 'a')
print('SNP19', file = sourceFile)
print(f"coefficient of determination(R2): {r_sq}", file = sourceFile)
print(f"intercept: {lr.intercept_}", file = sourceFile)
print(f"slope: {lr.coef_}", file = sourceFile), 
print('MSE:', lr_MSE, file = sourceFile)
sourceFile.close()

# snp20 = 'rs3798713'
x = df[['sex', 'rs3798713']].values
y = df[['muscle_mass']].values 
lr = LinearRegression()
lr.fit(x, y)
y_pred_lr=lr.predict(x)
r_sq = lr.score(x, y)
lr_MSE = mean_squared_error(y, y_pred_lr)
sourceFile = open('muscle_mass_snp_sex_linear_regressions.txt', 'a')
print('SNP20', file = sourceFile)
print(f"coefficient of determination(R2): {r_sq}", file = sourceFile)
print(f"intercept: {lr.intercept_}", file = sourceFile)
print(f"slope: {lr.coef_}", file = sourceFile), 
print('MSE:', lr_MSE, file = sourceFile)
sourceFile.close()

# snp21 = 'rs174548'
x = df[['sex', 'rs174548']].values
y = df[['muscle_mass']].values 
lr = LinearRegression()
lr.fit(x, y)
y_pred_lr=lr.predict(x)
r_sq = lr.score(x, y)
lr_MSE = mean_squared_error(y, y_pred_lr)
sourceFile = open('muscle_mass_snp_sex_linear_regressions.txt', 'a')
print('SNP21', file = sourceFile)
print(f"coefficient of determination(R2): {r_sq}", file = sourceFile)
print(f"intercept: {lr.intercept_}", file = sourceFile)
print(f"slope: {lr.coef_}", file = sourceFile), 
print('MSE:', lr_MSE, file = sourceFile)
sourceFile.close()

# snp22 = 'rs174546'
x = df[['sex', 'rs174546']].values
y = df[['muscle_mass']].values 
lr = LinearRegression()
lr.fit(x, y)
y_pred_lr=lr.predict(x)
r_sq = lr.score(x, y)
lr_MSE = mean_squared_error(y, y_pred_lr)
sourceFile = open('muscle_mass_snp_sex_linear_regressions.txt', 'a')
print('SNP22', file = sourceFile)
print(f"coefficient of determination(R2): {r_sq}", file = sourceFile)
print(f"intercept: {lr.intercept_}", file = sourceFile)
print(f"slope: {lr.coef_}", file = sourceFile), 
print('MSE:', lr_MSE, file = sourceFile)
sourceFile.close()

# snp23 = 'rs968567'
x = df[['sex', 'rs968567']].values
y = df[['muscle_mass']].values 
lr = LinearRegression()
lr.fit(x, y)
y_pred_lr=lr.predict(x)
r_sq = lr.score(x, y)
lr_MSE = mean_squared_error(y, y_pred_lr)
sourceFile = open('muscle_mass_snp_sex_linear_regressions.txt', 'a')
print('SNP23', file = sourceFile)
print(f"coefficient of determination(R2): {r_sq}", file = sourceFile)
print(f"intercept: {lr.intercept_}", file = sourceFile)
print(f"slope: {lr.coef_}", file = sourceFile), 
print('MSE:', lr_MSE, file = sourceFile)
sourceFile.close()

# snp24 = 'rs1501299'
x = df[['sex', 'rs1501299']].values
y = df[['muscle_mass']].values 
lr = LinearRegression()
lr.fit(x, y)
y_pred_lr=lr.predict(x)
r_sq = lr.score(x, y)
lr_MSE = mean_squared_error(y, y_pred_lr)
sourceFile = open('muscle_mass_snp_sex_linear_regressions.txt', 'a')
print('SNP24', file = sourceFile)
print(f"coefficient of determination(R2): {r_sq}", file = sourceFile)
print(f"intercept: {lr.intercept_}", file = sourceFile)
print(f"slope: {lr.coef_}", file = sourceFile), 
print('MSE:', lr_MSE, file = sourceFile)
sourceFile.close()

# snp25 = 'rs1801133'
x = df[['sex', 'rs1801133']].values
y = df[['muscle_mass']].values 
lr = LinearRegression()
lr.fit(x, y)
y_pred_lr=lr.predict(x)
r_sq = lr.score(x, y)
lr_MSE = mean_squared_error(y, y_pred_lr)
sourceFile = open('muscle_mass_snp_sex_linear_regressions.txt', 'a')
print('SNP25', file = sourceFile)
print(f"coefficient of determination(R2): {r_sq}", file = sourceFile)
print(f"intercept: {lr.intercept_}", file = sourceFile)
print(f"slope: {lr.coef_}", file = sourceFile), 
print('MSE:', lr_MSE, file = sourceFile)
sourceFile.close()

# snp26 = 'rs4961'
x = df[['sex', 'rs4961']].values
y = df[['muscle_mass']].values 
lr = LinearRegression()
lr.fit(x, y)
y_pred_lr=lr.predict(x)
r_sq = lr.score(x, y)
lr_MSE = mean_squared_error(y, y_pred_lr)
sourceFile = open('muscle_mass_snp_sex_linear_regressions.txt', 'a')
print('SNP26', file = sourceFile)
print(f"coefficient of determination(R2): {r_sq}", file = sourceFile)
print(f"intercept: {lr.intercept_}", file = sourceFile)
print(f"slope: {lr.coef_}", file = sourceFile), 
print('MSE:', lr_MSE, file = sourceFile)
sourceFile.close()

# snp27 = 'rs2241766'
x = df[['sex', 'rs2241766']].values
y = df[['muscle_mass']].values 
lr = LinearRegression()
lr.fit(x, y)
y_pred_lr=lr.predict(x)
r_sq = lr.score(x, y)
lr_MSE = mean_squared_error(y, y_pred_lr)
sourceFile = open('muscle_mass_snp_sex_linear_regressions.txt', 'a')
print('SNP27', file = sourceFile)
print(f"coefficient of determination(R2): {r_sq}", file = sourceFile)
print(f"intercept: {lr.intercept_}", file = sourceFile)
print(f"slope: {lr.coef_}", file = sourceFile), 
print('MSE:', lr_MSE, file = sourceFile)
sourceFile.close()

# snp28 = 'rs670'
x = df[['sex', 'rs670']].values
y = df[['muscle_mass']].values 
lr = LinearRegression()
lr.fit(x, y)
y_pred_lr=lr.predict(x)
r_sq = lr.score(x, y)
lr_MSE = mean_squared_error(y, y_pred_lr)
sourceFile = open('muscle_mass_snp_sex_linear_regressions.txt', 'a')
print('SNP28', file = sourceFile)
print(f"coefficient of determination(R2): {r_sq}", file = sourceFile)
print(f"intercept: {lr.intercept_}", file = sourceFile)
print(f"slope: {lr.coef_}", file = sourceFile), 
print('MSE:', lr_MSE, file = sourceFile)
sourceFile.close()

# snp29 = 'rs1799883'
x = df[['sex', 'rs1799883']].values
y = df[['muscle_mass']].values 
lr = LinearRegression()
lr.fit(x, y)
y_pred_lr=lr.predict(x)
r_sq = lr.score(x, y)
lr_MSE = mean_squared_error(y, y_pred_lr)
sourceFile = open('muscle_mass_snp_sex_linear_regressions.txt', 'a')
print('SNP29', file = sourceFile)
print(f"coefficient of determination(R2): {r_sq}", file = sourceFile)
print(f"intercept: {lr.intercept_}", file = sourceFile)
print(f"slope: {lr.coef_}", file = sourceFile), 
print('MSE:', lr_MSE, file = sourceFile)
sourceFile.close()

# snp30 = 'rs10182181'
x = df[['sex', 'rs10182181']].values
y = df[['muscle_mass']].values 
lr = LinearRegression()
lr.fit(x, y)
y_pred_lr=lr.predict(x)
r_sq = lr.score(x, y)
lr_MSE = mean_squared_error(y, y_pred_lr)
sourceFile = open('muscle_mass_snp_sex_linear_regressions.txt', 'a')
print('SNP30', file = sourceFile)
print(f"coefficient of determination(R2): {r_sq}", file = sourceFile)
print(f"intercept: {lr.intercept_}", file = sourceFile)
print(f"slope: {lr.coef_}", file = sourceFile), 
print('MSE:', lr_MSE, file = sourceFile)
sourceFile.close()

# snp31 = 'rs6265'
x = df[['sex', 'rs6265']].values
y = df[['muscle_mass']].values 
lr = LinearRegression()
lr.fit(x, y)
y_pred_lr=lr.predict(x)
r_sq = lr.score(x, y)
lr_MSE = mean_squared_error(y, y_pred_lr)
sourceFile = open('muscle_mass_snp_sex_linear_regressions.txt', 'a')
print('SNP31', file = sourceFile)
print(f"coefficient of determination(R2): {r_sq}", file = sourceFile)
print(f"intercept: {lr.intercept_}", file = sourceFile)
print(f"slope: {lr.coef_}", file = sourceFile), 
print('MSE:', lr_MSE, file = sourceFile)
sourceFile.close()

# snp32 = 'rs3764261'
x = df[['sex', 'rs3764261']].values
y = df[['muscle_mass']].values 
lr = LinearRegression()
lr.fit(x, y)
y_pred_lr=lr.predict(x)
r_sq = lr.score(x, y)
lr_MSE = mean_squared_error(y, y_pred_lr)
sourceFile = open('muscle_mass_snp_sex_linear_regressions.txt', 'a')
print('SNP32', file = sourceFile)
print(f"coefficient of determination(R2): {r_sq}", file = sourceFile)
print(f"intercept: {lr.intercept_}", file = sourceFile)
print(f"slope: {lr.coef_}", file = sourceFile), 
print('MSE:', lr_MSE, file = sourceFile)
sourceFile.close()

# snp33 = 'rs13702'
x = df[['sex', 'rs13702']].values
y = df[['muscle_mass']].values 
lr = LinearRegression()
lr.fit(x, y)
y_pred_lr=lr.predict(x)
r_sq = lr.score(x, y)
lr_MSE = mean_squared_error(y, y_pred_lr)
sourceFile = open('muscle_mass_snp_sex_linear_regressions.txt', 'a')
print('SNP33', file = sourceFile)
print(f"coefficient of determination(R2): {r_sq}", file = sourceFile)
print(f"intercept: {lr.intercept_}", file = sourceFile)
print(f"slope: {lr.coef_}", file = sourceFile), 
print('MSE:', lr_MSE, file = sourceFile)
sourceFile.close()