In [None]:
# Step 1: Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Step 2: Import Required Libraries
import csv
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score
import joblib  # To save the trained model

# Step 3: Load the Clustered File
file_path = '/content/drive/MyDrive/clustered_features.csv'  # Update this path

# Manually read the CSV without pandas
features = []
clusters = []
with open(file_path, 'r') as file:
    reader = csv.DictReader(file)
    for row in reader:
        features.append([
            float(row['nesting_depth']),
            float(row['num_ids']),
            float(row['num_classes']),
            float(row['num_important']),
            float(row['duplicate_selectors'])
        ])
        clusters.append(int(row['cluster']))  # Use the cluster column as the target

# Step 4: Map Clusters to Labels
# Map cluster numbers to meaningful labels
label_mapping = {
    0: "Clean",
    1: "Moderate Smells",
    2: "High Smells"
}
labels = [label_mapping[cluster] for cluster in clusters]

# Step 5: Prepare Features and Labels
X = features  # Features extracted from the file
y = labels    # Labels corresponding to clusters (Clean, Moderate Smells, High Smells)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 6: Train a Random Forest Classifier
clf = RandomForestClassifier(random_state=42)
clf.fit(X_train, y_train)

# Step 7: Evaluate the Model
y_pred = clf.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

# Step 8: Save the Model to Google Drive
model_path = '/content/drive/MyDrive/css_smell_classifier.pkl'  # Update this path
joblib.dump(clf, model_path)
print(f"Model saved to {model_path}")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Accuracy: 0.9583333333333334

Classification Report:
                  precision    recall  f1-score   support

          Clean       1.00      0.94      0.97        72
    High Smells       0.80      1.00      0.89        12
Moderate Smells       0.92      1.00      0.96        12

       accuracy                           0.96        96
      macro avg       0.91      0.98      0.94        96
   weighted avg       0.97      0.96      0.96        96

Model saved to /content/drive/MyDrive/css_smell_classifier.pkl


In [None]:
# Step 1: Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Step 2: Import Required Libraries
import joblib
import re

# Step 3: Load the Trained Model
model_path = '/content/drive/MyDrive/css_smell_classifier.pkl'  # Update path
clf = joblib.load(model_path)
print("Model loaded successfully!")

# Step 4: Feature Extraction for a New CSS File
def extract_features_from_css(file_path):
    """
    Extract features from a single CSS file.
    """
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            css_code = file.read()

        # Extracting features
        nesting_depth = css_code.count('{') - css_code.count('}')
        num_ids = css_code.count('#')
        num_classes = css_code.count('.')
        num_important = css_code.count('!important')
        selectors = re.findall(r'([^\{\}]+)\s*\{', css_code)
        duplicate_selectors = len(selectors) - len(set(selectors))

        return [nesting_depth, num_ids, num_classes, num_important, duplicate_selectors]
    except Exception as e:
        print(f"Error reading or processing file: {e}")
        return None

# Step 5: Test the Model on a New CSS File
test_file_path = '/content/drive/MyDrive/_common.scss'  # Update path to your test CSS file
features = extract_features_from_css(test_file_path)

if features:
    # Reshape features for prediction (1 sample, 5 features)
    prediction = clf.predict([features])
    print(f"Predicted Class: {prediction[0]}")  # Output the predicted label
else:
    print("Feature extraction failed. Ensure the test file is valid.")


Mounted at /content/drive
Model loaded successfully!
Predicted Class: Moderate Smells


In [None]:
# Step 1: Import Required Libraries
import os
import csv
# Step 2: Function for Batch Prediction
def classify_css_files(folder_path, model_path):
    """
    Classify multiple CSS files in a folder using the trained model.
    """
    # Load the trained model
    clf = joblib.load(model_path)
    print("Model loaded successfully!")

    results = []
    for file_name in os.listdir(folder_path):
        if file_name.endswith('.css'):
            file_path = os.path.join(folder_path, file_name)
            features = extract_features_from_css(file_path)
            if features:
                prediction = clf.predict([features])
                results.append((file_name, prediction[0]))

    # Print and save results
    print("\nClassification Results:")
    for file_name, prediction in results:
        print(f"File: {file_name}, Predicted Class: {prediction}")

    # Save results to a CSV
    results_path = os.path.join(folder_path, 'classification_results.csv')
    with open(results_path, 'w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(['File Name', 'Predicted Class'])
        writer.writerows(results)
    print(f"Results saved to {results_path}")

# Step 3: Run Batch Classification
folder_path = '/content/drive/MyDrive/test_css_files/'  # Path to folder with CSS files
model_path = '/content/drive/MyDrive/css_smell_classifier.pkl'  # Path to trained model
classify_css_files(folder_path, model_path)


Model loaded successfully!

Classification Results:
File: fonts.css, Predicted Class: High Smells
File: style.css, Predicted Class: Moderate Smells
File: common.css, Predicted Class: Moderate Smells
File: main.css, Predicted Class: Moderate Smells
File: components.css, Predicted Class: High Smells
Results saved to /content/drive/MyDrive/test_css_files/classification_results.csv


In [None]:
# Step 1: Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Step 2: Import Required Libraries
import os
import csv
import re
import joblib

# Step 3: Define Feature Extraction Function
def extract_features_from_css(file_path):
    """
    Extract features from a single CSS file.
    """
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            css_code = file.read()

        # Extract features
        nesting_depth = css_code.count('{') - css_code.count('}')
        num_ids = css_code.count('#')
        num_classes = css_code.count('.')
        num_important = css_code.count('!important')
        selectors = re.findall(r'([^\{\}]+)\s*\{', css_code)
        duplicate_selectors = len(selectors) - len(set(selectors))

        return [nesting_depth, num_ids, num_classes, num_important, duplicate_selectors]
    except Exception as e:
        print(f"Error processing CSS file: {e}")
        return None

# Step 4: Load the Trained Model
model_path = '/content/drive/MyDrive/css_smell_classifier.pkl'  # Update this path
clf = joblib.load(model_path)
print("Model loaded successfully!")

# Step 5: Set Folder Path for Test CSS Files
test_folder_path = '/content/drive/MyDrive/test_css_files/'  # Update this path to your folder
report_path = '/content/drive/MyDrive/css_analysis_report.csv'  # Path to save the report

# Check if the folder exists
if not os.path.exists(test_folder_path):
    raise FileNotFoundError(f"The folder '{test_folder_path}' does not exist. Please check the path.")

# Step 6: Batch Process CSS Files
results = []
summary = {"Clean": 0, "Moderate Smells": 0, "High Smells": 0}  # To summarize predictions

for file_name in os.listdir(test_folder_path):
    if file_name.endswith('.css'):
        file_path = os.path.join(test_folder_path, file_name)
        features = extract_features_from_css(file_path)
        if features:
            prediction = clf.predict([features])[0]
            results.append({
                "file_name": file_name,
                "nesting_depth": features[0],
                "num_ids": features[1],
                "num_classes": features[2],
                "num_important": features[3],
                "duplicate_selectors": features[4],
                "predicted_class": prediction
            })
            summary[prediction] += 1

# Step 7: Print the Summary
print("Prediction Summary:")
for label, count in summary.items():
    print(f"{label}: {count} files")

# Step 8: Save the Detailed Report
with open(report_path, 'w', newline='', encoding='utf-8') as report_file:
    fieldnames = ["file_name", "nesting_depth", "num_ids", "num_classes", "num_important", "duplicate_selectors", "predicted_class"]
    writer = csv.DictWriter(report_file, fieldnames=fieldnames)
    writer.writeheader()
    writer.writerows(results)

print(f"Detailed report saved to: {report_path}")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Model loaded successfully!
Prediction Summary:
Clean: 0 files
Moderate Smells: 3 files
High Smells: 2 files
Detailed report saved to: /content/drive/MyDrive/css_analysis_report.csv


In [None]:
from google.colab import files

# Path to the model in Google Drive
model_path = '/content/drive/MyDrive/css_smell_classifier.pkl'

# Copy the model to the Colab workspace (if necessary)
!cp {model_path} ./css_smell_classifier.pkl

# Download the model
files.download('./css_smell_classifier.pkl')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>