## Algorithms for One-hot encoding

In [0]:
# Import required libraries
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt

In [0]:
# Read HBB and HEXA file
df5 = pd.read_excel("onehot.xlsx")

In [0]:
df5 = df5.drop(['Unnamed: 0'], axis=1)

In [0]:
# Features
X = df5.drop('Mean of correct edit', axis=1)

In [0]:
# Target variable
y = df5['Mean of correct edit']

In [0]:
# Splitting the dataset into the Training and Test dataset
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

## Multiple Linear Regression Model



In [0]:
# Import Multiple Linear Regression Model
from sklearn.linear_model import LinearRegression

In [33]:
regressor = LinearRegression()
regressor.fit(X_train, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [0]:
# Use the regressor's predict method on the test data
pred= regressor.predict(X)

In [0]:
# Place predictions in the dataframe
df5['Predictions'] = pd.DataFrame(data=pred)

In [0]:
results = df5[['Mean of correct edit', 'Predictions']] 

In [37]:
# Print the actual values and the model's predictions
results

Unnamed: 0,Mean of correct edit,Predictions
0,40.982414,2.142732
1,32.843033,34.178042
2,36.210669,36.210669
3,46.204905,47.016416
4,42.342045,41.007035
5,44.501013,45.468979
6,38.488102,61.930128
7,29.187135,27.852126
8,29.95973,29.884752
9,40.690499,40.690499


In [38]:
import sklearn.metrics as metrics

# Calculate MAE, MSE, RMSE
print(metrics.mean_absolute_error(df5['Mean of correct edit'],pred))
print(metrics.mean_squared_error(df5['Mean of correct edit'], pred))
print(np.sqrt(metrics.mean_squared_error(df5['Mean of correct edit'], pred)))

5.16883133499603
106.4896521570972
10.319382353469475


# Support Vector Regression

In [0]:
from sklearn.svm import SVR

In [0]:
# Use the regressor's predict method on the test data
SupportVectorRegModel = SVR()

In [48]:
SupportVectorRegModel.fit(X_train, y_train)

SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='scale',
    kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

In [0]:
# Use the regressor's predict method on the test data
pred= SupportVectorRegModel.predict(X)

In [0]:
# Place predictions in the dataframe
df5['Predictions2'] = pd.DataFrame(data=pred)

In [0]:
results = df5[['Mean of correct edit', 'Predictions']] 

In [54]:
# Print the actual values and the model's predictions
results

Unnamed: 0,Mean of correct edit,Predictions
0,40.982414,2.142732
1,32.843033,34.178042
2,36.210669,36.210669
3,46.204905,47.016416
4,42.342045,41.007035
5,44.501013,45.468979
6,38.488102,61.930128
7,29.187135,27.852126
8,29.95973,29.884752
9,40.690499,40.690499


In [55]:
import sklearn.metrics as metrics

# Calculate MAE, MSE, RMSE
print(metrics.mean_absolute_error(df5['Mean of correct edit'],pred))
print(metrics.mean_squared_error(df5['Mean of correct edit'], pred))
print(np.sqrt(metrics.mean_squared_error(df5['Mean of correct edit'], pred)))

6.958414076681819
81.27122853076811
9.015055658772614


## Algorithms for Ordinal Vectors

In [0]:
# Read HBB and HEXA file
df6 = pd.read_excel("ordinal.xlsx")

In [0]:
df6 = df6.drop(['Unnamed: 0'], axis=1)

In [0]:
# Features
X = df6.drop('Mean of correct edit', axis=1)

In [0]:
# Target variable
y = df6['Mean of correct edit']

In [0]:
# Splitting the dataset into the Training and Test dataset
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

## Multiple Linear Regression Model



In [67]:
regressor = LinearRegression()
regressor.fit(X_train, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [0]:
# Use the regressor's predict method on the test data
pred= regressor.predict(X)

In [0]:
# Place predictions in the dataframe
df6['Predictions'] = pd.DataFrame(data=pred)

In [0]:
results = df6[['Mean of correct edit', 'Predictions']] 

In [71]:
# Print the actual values and the model's predictions
results

Unnamed: 0,Mean of correct edit,Predictions
0,40.982414,16.156656
1,32.843033,34.258183
2,36.210669,35.273682
3,46.204905,45.551852
4,42.342045,41.863881
5,44.501013,41.931912
6,38.488102,46.22048
7,29.187135,28.948428
8,29.95973,29.963926
9,40.690499,40.242096


In [72]:
import sklearn.metrics as metrics

# Calculate MAE, MSE, RMSE
print(metrics.mean_absolute_error(df6['Mean of correct edit'],pred))
print(metrics.mean_squared_error(df6['Mean of correct edit'], pred))
print(np.sqrt(metrics.mean_squared_error(df6['Mean of correct edit'], pred)))

3.0556489544309033
25.249808033413533
5.024918709134859


# Support Vector Regression

In [0]:
# Use the regressor's predict method on the test data
SupportVectorRegModel = SVR()

In [74]:
SupportVectorRegModel.fit(X_train, y_train)

SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='scale',
    kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

In [0]:
# Use the regressor's predict method on the test data
pred= SupportVectorRegModel.predict(X)

In [0]:
# Place predictions in the dataframe
df6['Predictions2'] = pd.DataFrame(data=pred)

In [0]:
results = df6[['Mean of correct edit', 'Predictions']] 

In [78]:
# Print the actual values and the model's predictions
results

Unnamed: 0,Mean of correct edit,Predictions
0,40.982414,16.156656
1,32.843033,34.258183
2,36.210669,35.273682
3,46.204905,45.551852
4,42.342045,41.863881
5,44.501013,41.931912
6,38.488102,46.22048
7,29.187135,28.948428
8,29.95973,29.963926
9,40.690499,40.242096


In [79]:
import sklearn.metrics as metrics

# Calculate MAE, MSE, RMSE
print(metrics.mean_absolute_error(df6['Mean of correct edit'],pred))
print(metrics.mean_squared_error(df6['Mean of correct edit'], pred))
print(np.sqrt(metrics.mean_squared_error(df6['Mean of correct edit'], pred)))

10.77534987241704
167.6173344293209
12.946711336448375


## Algortihms for k-mers (k=3)

In [0]:
# Read HBB and HEXA file
df7 = pd.read_excel('kmers.xlsx')

In [0]:
df7 = df7.drop(['Unnamed: 0','pegRNA', '3extension'], axis=1)

In [0]:
# Features
X = df7.drop('Mean of correct edit', axis=1)

In [0]:
# Target variable
y = df7['Mean of correct edit']

In [0]:
# Splitting the dataset into the Training and Test dataset
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

## Multiple Linear Regression Model

In [100]:
regressor = LinearRegression()
regressor.fit(X_train, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [0]:
# Use the regressor's predict method on the test data
pred= regressor.predict(X)

In [0]:
# Place predictions in the dataframe
df7['Predictions'] = pd.DataFrame(data=pred)

In [0]:
results = df7[['Mean of correct edit', 'Predictions']] 

In [104]:
# Print the actual values and the model's predictions
results

Unnamed: 0,Mean of correct edit,Predictions
0,40.982414,40.46849
1,32.843033,34.178042
2,36.210669,36.210669
3,46.204905,47.016416
4,42.342045,41.007035
5,44.501013,45.468979
6,38.488102,44.882812
7,29.187135,27.852126
8,29.95973,29.884752
9,40.690499,40.690499


In [105]:
import sklearn.metrics as metrics

# Calculate MAE, MSE, RMSE
print(metrics.mean_absolute_error(df7['Mean of correct edit'],pred))
print(metrics.mean_squared_error(df7['Mean of correct edit'], pred))
print(np.sqrt(metrics.mean_squared_error(df7['Mean of correct edit'], pred)))

2.5091480420172187
14.0857105280476
3.753093461139437


# Support Vector Regression

In [0]:
# Use the regressor's predict method on the test data
SupportVectorRegModel = SVR()

In [107]:
SupportVectorRegModel.fit(X_train, y_train)

SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='scale',
    kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

In [0]:
# Use the regressor's predict method on the test data
pred= SupportVectorRegModel.predict(X)

In [0]:
# Place predictions in the dataframe
df7['Predictions2'] = pd.DataFrame(data=pred)

In [0]:
results = df7[['Mean of correct edit', 'Predictions']] 

In [111]:
# Print the actual values and the model's predictions
results

Unnamed: 0,Mean of correct edit,Predictions
0,40.982414,40.46849
1,32.843033,34.178042
2,36.210669,36.210669
3,46.204905,47.016416
4,42.342045,41.007035
5,44.501013,45.468979
6,38.488102,44.882812
7,29.187135,27.852126
8,29.95973,29.884752
9,40.690499,40.690499


In [112]:
import sklearn.metrics as metrics

# Calculate MAE, MSE, RMSE
print(metrics.mean_absolute_error(df7['Mean of correct edit'],pred))
print(metrics.mean_squared_error(df7['Mean of correct edit'], pred))
print(np.sqrt(metrics.mean_squared_error(df7['Mean of correct edit'], pred)))

10.430284437641834
155.4385810973711
12.467500996485667
