In [8]:
pip install scikit-learn

Collecting scikit-learn
  Downloading scikit_learn-1.5.0-cp310-cp310-win_amd64.whl (11.0 MB)
     ---------------------------------------- 11.0/11.0 MB 1.2 MB/s eta 0:00:00
Collecting joblib>=1.2.0
  Downloading joblib-1.4.2-py3-none-any.whl (301 kB)
     -------------------------------------- 301.8/301.8 kB 1.2 MB/s eta 0:00:00
Collecting threadpoolctl>=3.1.0
  Downloading threadpoolctl-3.5.0-py3-none-any.whl (18 kB)
Collecting scipy>=1.6.0
  Downloading scipy-1.13.0-cp310-cp310-win_amd64.whl (46.2 MB)
     ---------------------------------------- 46.2/46.2 MB 1.2 MB/s eta 0:00:00
Installing collected packages: threadpoolctl, scipy, joblib, scikit-learn
Successfully installed joblib-1.4.2 scikit-learn-1.5.0 scipy-1.13.0 threadpoolctl-3.5.0
Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip available: 22.2.2 -> 24.0
[notice] To update, run: python.exe -m pip install --upgrade pip


In [9]:
import os
import cv2
import json
import numpy as np
import pandas as pd
from urllib.parse import urlparse
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split

In [10]:
# Load and preprocess the dataset

def parse_url(url):
    parsed_url = urlparse(url)
    domain = parsed_url.netloc
    return f"{domain}.jpg"

def load_data():
    # Implement loading and preprocessing your dataset 
   
    f = open('lighthouse_report.json')
    data = json.load(f)['lighthouseReports']
    f.close

    ratings = pd.DataFrame(data)
    
    ratings['filepath'] = ratings['url'].apply(parse_url)
    ratings.drop('id', axis=1, inplace=True)
        
    images = []
    for i in range(0,len(ratings)):
        filepath = ratings['filepath'][i]
        images.append(cv2.imread('images/{0}'.format(filepath),0))
    
    return np.array(images), np.array(ratings['seo'])

In [28]:
images, ratings = load_data()
images = images / 255.0  # normalize images

print(images.shape)

X_train, X_test, y_train, y_test = train_test_split(images, ratings, test_size=0.2, random_state=42)

print(y_train)


(43, 224, 224)
[ 85.  83.  85.  92.  92.  92.  92. 100. 100. 100.  92.  83.  75.  83.
  83.  92.  92. 100.  73.  77. 100.  85.  92.  73.  83. 100. 100.  92.
  83.  83.  92.  58.  85.  77.]


In [29]:

# Define the model
model = Sequential([
    Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(224, 224, 1)),
    MaxPooling2D(pool_size=(2, 2)),
    Dropout(0.25),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    Dropout(0.25),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='linear')  # Output layer for regression
])

model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])
model.summary()

# Train the model
history = model.fit(X_train, y_train, epochs=200, validation_split=0.2, batch_size=32)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/200
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step - loss: 8016.7061 - mae: 89.1231 - val_loss: 1052.3000 - val_mae: 28.6798
Epoch 2/200
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 773ms/step - loss: 904.7519 - mae: 25.6576 - val_loss: 1541.6984 - val_mae: 36.6791
Epoch 3/200
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 743ms/step - loss: 469.2430 - mae: 17.7352 - val_loss: 2613.7205 - val_mae: 49.5833
Epoch 4/200
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 778ms/step - loss: 646.2451 - mae: 21.0767 - val_loss: 2472.1370 - val_mae: 48.1018
Epoch 5/200
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 752ms/step - loss: 646.3618 - mae: 21.9386 - val_loss: 1906.6034 - val_mae: 41.5907
Epoch 6/200
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 765ms/step - loss: 577.4302 - mae: 19.5453 - val_loss: 1999.8314 - val_mae: 42.7665
Epoch 7/200
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m

In [31]:
# Evaluate the model
test_loss, test_mae = model.evaluate(X_test, y_test)
print(f'Test MAE: {test_mae}')

# Predict rating for a new screenshot
def predict_rating(screenshot):
    screenshot = screenshot / 255.0
    screenshot = np.expand_dims(screenshot, axis=0)  # Add batch dimension
    predicted_rating = model.predict(screenshot)
    return predicted_rating[0][0]

# Example usage
new_screenshot = cv2.imread('images/{0}'.format('hydromotor.pl.jpg'),0)  # Load your new screenshot
predicted_rating = predict_rating(new_screenshot)
print(f'Predicted Rating: {predicted_rating}')

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 82ms/step - loss: 2421.6506 - mae: 47.6739
Test MAE: 47.67393493652344
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step
Predicted Rating: 42.64337921142578
