In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
import tensorflow as tf
from sklearn.preprocessing import StandardScaler

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [2]:
np.random.seed(42)
num_samples = 1000
num_features = 5

In [3]:
X = np.random.rand(num_samples, num_features) * 100

In [4]:
true_weights = np.array([50, 30, -10, 5, 2])  
y = X @ true_weights + np.random.normal(scale=10, size=num_samples)

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
model_sklearn = LinearRegression()
model_sklearn.fit(X_train, y_train)

In [7]:
y_pred_train = model_sklearn.predict(X_train)
y_pred_test = model_sklearn.predict(X_test)

In [8]:
print("Training MSE (scikit-learn):", mean_squared_error(y_train, y_pred_train))
print("Testing MSE (scikit-learn):", mean_squared_error(y_test, y_pred_test))

Training MSE (scikit-learn): 96.3409531923308
Testing MSE (scikit-learn): 111.42116595532582


In [9]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [10]:
X_train_tf = tf.convert_to_tensor(X_train_scaled, dtype=tf.float32)
X_test_tf = tf.convert_to_tensor(X_test_scaled, dtype=tf.float32)
y_train_tf = tf.convert_to_tensor(y_train, dtype=tf.float32)
y_test_tf = tf.convert_to_tensor(y_test, dtype=tf.float32)

In [12]:
model_tf = tf.keras.Sequential([tf.keras.layers.Dense(1, input_shape=(X_train_tf.shape[1],), activation='linear')])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [13]:
model_tf.compile(optimizer='adam', loss='mean_squared_error')

In [14]:
model_tf.fit(X_train_tf, y_train_tf, epochs=100, verbose=1, validation_split=0.2)

Epoch 1/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - loss: 17860580.0000 - val_loss: 16450933.0000
Epoch 2/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 18098160.0000 - val_loss: 16450710.0000
Epoch 3/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 18226828.0000 - val_loss: 16450496.0000
Epoch 4/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 16823478.0000 - val_loss: 16450285.0000
Epoch 5/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 17781990.0000 - val_loss: 16450062.0000
Epoch 6/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 17484876.0000 - val_loss: 16449850.0000
Epoch 7/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 17212462.0000 - val_loss: 16449632.0000
Epoch 8/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m

<keras.src.callbacks.history.History at 0x1a3f7f5f8e0>

In [15]:
loss = model_tf.evaluate(X_test_tf, y_test_tf)
print("Testing Loss (TensorFlow):", loss)

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 18358912.0000  
Testing Loss (TensorFlow): 18098544.0


In [16]:
y_pred_test_tf = model_tf.predict(X_test_tf)
y_pred_test_tf = y_pred_test_tf.flatten()

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step 


In [17]:
mse_tf = mean_squared_error(y_test, y_pred_test_tf)
print("Testing MSE (TensorFlow):", mse_tf)

Testing MSE (TensorFlow): 18098543.440773595
