# Transpiling a Python LR model to C

## Initializing virtual env and dependancies

In [3]:
! ./start.sh

Collecting scikit-learn==1.0.1
  Using cached scikit-learn-1.0.1.tar.gz (6.6 MB)
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
[?25hCollecting joblib==1.1.0
  Using cached joblib-1.1.0-py2.py3-none-any.whl (306 kB)
Collecting scipy>=1.1.0
  Using cached scipy-1.7.3-1-cp39-cp39-macosx_12_0_arm64.whl (27.0 MB)
Collecting threadpoolctl>=2.0.0
  Using cached threadpoolctl-3.0.0-py3-none-any.whl (14 kB)
Building wheels for collected packages: scikit-learn
  Building wheel for scikit-learn (pyproject.toml) ... [?25ldone
[?25h  Created wheel for scikit-learn: filename=scikit_learn-1.0.1-cp39-cp39-macosx_11_0_arm64.whl size=6815553 sha256=2b1045b39659383ac1028959dfcf2fba86e8261d30b271cbed7d58e6756029ca
  Stored in directory: /Users/redasahrane/Library/Caches/pip/wheels/52/9c/a6/ec245545963d3bd597e2247564829b0ce1ab4d62db030c3a93
Successfully built scikit-learn
Installing col

## Saving the LR model with joblib

Linear regression made on the Iris dataset

In [6]:
import pandas as pd
import joblib

from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

df = pd.read_csv("iris.csv")

X = df[["sepal.length", "sepal.width"]]
y = df["petal.length"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.10, random_state=42)
lr = LinearRegression()
lr.fit(X_train, y_train)

joblib.dump(lr, "lr_model.joblib")

['lr_model.joblib']

## Loading the model saved

In [19]:
lr_model = joblib.load('lr_model.joblib')

thetas = lr_model.coef_
n_thetas = len(thetas)
bias = lr_model.intercept_

Converting the thetas to C array

In [22]:
thetas_str = f""
for t in thetas:
    thetas_str += str(t) + "f, "

thetas_str = thetas_str.strip(', ')
thetas_str_c = f"float thetas[{n_thetas}] = {{ {thetas_str} }};"
thetas_str_c

'float thetas[2] = { 1.7857587975297526f, -1.3404875694395608f };'

Converting input test set to C

In [30]:
to_predict = X_test.to_numpy()
features = ""

for row in to_predict:
  feature = "{"
  for value in row:
    feature += str(value) + ", "
  features += feature.strip(', ')
  features += "},\n"

print(features)

{6.1, 2.8},
{5.7, 3.8},
{7.7, 2.6},
{6.0, 2.9},
{6.8, 2.8},
{5.4, 3.4},
{5.6, 2.9},
{6.9, 3.1},
{6.2, 2.2},
{5.8, 2.7},
{6.5, 3.2},
{4.8, 3.0},
{5.5, 3.5},
{4.9, 3.1},
{5.1, 3.8},



In [38]:
code = f"""\
#include <stdio.h>
float lr_prediction(float *features, float* thetas)
{{
    float res = {bias};
    for (int i = 0; i < {n_thetas}; ++i)
        res += features[i] * thetas[i];
    return res;
}}

int main(int argc, char *arvgv[])
{{
    float features[{to_predict.shape[0]}][{to_predict.shape[1]}] = {{{features}}};
    {thetas_str_c} 

    for (int i = 0; i < {to_predict.shape[0]}; ++i) {{
        printf("%f\\n", lr_prediction(features[i], thetas));
    }}
    
    return 0;
}}
"""

print(code)

#include <stdio.h>
float lr_prediction(float *features, float* thetas)
{
    float res = -2.527642847333552;
    for (int i = 0; i < 2; ++i)
        res += features[i] * thetas[i];
    return res;
}

int main(int argc, char *arvgv[])
{
    float features[15][2] = {{6.1, 2.8},
{5.7, 3.8},
{7.7, 2.6},
{6.0, 2.9},
{6.8, 2.8},
{5.4, 3.4},
{5.6, 2.9},
{6.9, 3.1},
{6.2, 2.2},
{5.8, 2.7},
{6.5, 3.2},
{4.8, 3.0},
{5.5, 3.5},
{4.9, 3.1},
{5.1, 3.8},
};
    float thetas[2] = { 1.7857587975297526f, -1.3404875694395608f }; 

    for (int i = 0; i < 15; ++i) {
        printf("%f\n", lr_prediction(features[i], thetas));
    }
    
    return 0;
}



## Generating the C file

In [39]:
with open("linear_regression_iris.c", "w") as f:
    f.write(code)

## Compilation and checking predictions

In [40]:
!gcc linear_regression_iris.c -o lr

In [45]:
print('Transpiled LR model:')
!./lr

print('*******************')
predicted = lr_model.predict(X_test)
print('Sklearn LR model:')
for y in predicted:
  print(y)

Transpiled LR model:
4.612120
2.557329
7.737432
4.299496
5.862152
2.557796
3.585192
5.638582
5.594989
4.210442
4.790229
2.022537
2.602324
2.067064
1.485874
*******************
Sklearn LR model:
4.6121206231671685
2.5573295347157083
7.737432213102685
4.299495986470237
5.862151781437995
2.5577969232326057
3.585192467458336
5.638581390359104
5.594989044583881
4.2104417408521995
4.790229114403244
2.0225366724905776
2.602324046041624
2.067063795299598
1.4858742561978557
