<a href="https://colab.research.google.com/github/Esandu-Meth-Obadaarachchi/ItempricePrediction/blob/main/PricePrediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [9]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.neighbors import LocalOutlierFactor

In [27]:
data = pd.DataFrame({
    "Type": [
        "Battery", "Battery", "Battery", "Inverter", "Inverter", "Rectifier", "Rectifier", "Solar Panel",
        "Solar Panel", "Generator", "Generator", "Precision AC", "Precision AC", "Battery", "Battery",
        "Inverter", "Inverter", "Rectifier", "Rectifier", "Solar Panel", "Solar Panel", "Generator",
        "Generator", "Precision AC", "Precision AC"
    ],
    "Supplier": [
        "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S",
        "T", "U", "V", "W", "X", "Y"
    ],
    "Item Name": [
        "Battery 120AH", "Battery 90AH", "Battery 150AH", "Inverter 3KW", "Inverter 5KW",
        "Rectifier 48V", "Rectifier 24V", "Solar Panel 200W", "Solar Panel 300W", "Generator 10KVA",
        "Generator 20KVA", "Precision AC 3 Ton", "Precision AC 5 Ton", "Battery 110AH",
        "Battery 100AH", "Inverter 2KW", "Inverter 6KW", "Rectifier 48V High Output",
        "Rectifier 12V", "Solar Panel 250W", "Solar Panel 500W", "Generator 15KVA", "Generator 25KVA",
        "Precision AC 2 Ton", "Precision AC 4 Ton"
    ],
    "Total Price": [
        10000, 8000, 15000, 30000, 50000, 18000, 12000, 25000, 37000, 75000, 120000, 90000, 150000,
        9500, 8500, 28000, 52000, 20000, 10000, 30000, 60000, 80000, 130000, 85000, 140000
    ],
    "Quantity": [
        2, 1, 3, 1, 1, 2, 2, 3, 2, 1, 1, 2, 3, 1, 1, 1, 1, 2, 1, 3, 2, 1, 1, 2, 3
    ],
    "Rate": [
        5000, 8000, 5000, 30000, 50000, 9000, 6000, 8300, 18500, 75000, 120000, 45000, 50000,
        9500, 8500, 28000, 52000, 10000, 10000, 10000, 30000, 80000, 130000, 42500, 46666
    ],
    "Discount": [
        5, 0, 10, 15, 20, 5, 0, 10, 15, 0, 0, 10, 15, 5, 5, 0, 20, 0, 0, 10, 20, 5, 5, 10, 15
    ],
    "Warranty": [
        "2 years", "1 year", "3 years", "2 years", "5 years", "1 year", "1 year", "10 years",
        "15 years", "3 years", "5 years", "10 years", "15 years", "2 years", "2 years", "3 years",
        "5 years", "1 year", "1 year", "12 years", "15 years", "3 years", "5 years", "7 years", "10 years"
    ]
})

print(data)

            Type Supplier                  Item Name  Total Price  Quantity  \
0        Battery        A              Battery 120AH        10000         2   
1        Battery        B               Battery 90AH         8000         1   
2        Battery        C              Battery 150AH        15000         3   
3       Inverter        D               Inverter 3KW        30000         1   
4       Inverter        E               Inverter 5KW        50000         1   
5      Rectifier        F              Rectifier 48V        18000         2   
6      Rectifier        G              Rectifier 24V        12000         2   
7    Solar Panel        H           Solar Panel 200W        25000         3   
8    Solar Panel        I           Solar Panel 300W        37000         2   
9      Generator        J            Generator 10KVA        75000         1   
10     Generator        K            Generator 20KVA       120000         1   
11  Precision AC        L         Precision AC 3 Ton

In [28]:
# Step 1.1: Handle Missing Values
from sklearn.impute import SimpleImputer
import numpy as np

# Fill missing or zero 'Rate' using 'Total Price / Quantity'
def calculate_rate(row):
    if pd.isna(row['Rate']) or row['Rate'] == 0:
        if row['Quantity'] > 0:  # Avoid division by zero
            return row['Total Price'] / row['Quantity']
    return row['Rate']

data['Rate'] = data.apply(calculate_rate, axis=1)

# Fill missing values for 'Rate' and 'Discount' using mean imputation
imputer = SimpleImputer(strategy='mean')
data[['Rate', 'Discount']] = imputer.fit_transform(data[['Rate', 'Discount']])

In [19]:
# Step 1: Normalize Item Names using TF-IDF
vectorizer = TfidfVectorizer()
item_vectors = vectorizer.fit_transform(data['Item Name'])
similarity_matrix = cosine_similarity(item_vectors)
data['Cluster'] = np.argmax(similarity_matrix, axis=1)  # Cluster similar items

In [29]:
# Step 1.3: Adjust for Inflation (Assume inflation index is provided)
# Example inflation adjustment (factor = 1.02 for 2% inflation rate)
inflation_factor = 1.02
data['Inflation Adjusted Price'] = data['Rate'] * inflation_factor

In [30]:
print("Preprocessed Data:")
print(data)

Preprocessed Data:
            Type Supplier                  Item Name  Total Price  Quantity  \
0        Battery        A              Battery 120AH        10000         2   
1        Battery        B               Battery 90AH         8000         1   
2        Battery        C              Battery 150AH        15000         3   
3       Inverter        D               Inverter 3KW        30000         1   
4       Inverter        E               Inverter 5KW        50000         1   
5      Rectifier        F              Rectifier 48V        18000         2   
6      Rectifier        G              Rectifier 24V        12000         2   
7    Solar Panel        H           Solar Panel 200W        25000         3   
8    Solar Panel        I           Solar Panel 300W        37000         2   
9      Generator        J            Generator 10KVA        75000         1   
10     Generator        K            Generator 20KVA       120000         1   
11  Precision AC        L        

In [31]:
# Step 3: Prepare Features for Clustering
data = data.drop(columns=['Quantity', 'Total Price'])
features = data[['Cluster', 'Rate', 'Discount', 'Inflation Adjusted Price']].copy()

# Step 4: Normalize Features
scaler = StandardScaler()
scaled_features = scaler.fit_transform(features)

# Step 5: Train the KNN Model
knn = LocalOutlierFactor(n_neighbors=5, contamination=0.1)  # Adjust `contamination` as needed
data['Anomaly Score'] = knn.fit_predict(scaled_features)

# Mark anomalies
data['Is Anomaly'] = data['Anomaly Score'] == -1

print("\nData with Anomaly Detection Results:")
print(data)

KeyError: "['Cluster'] not in index"

In [23]:
# Function to Check New Items
def detect_anomaly(new_item, vectorizer, model, scaler, data_features):
    """
    Detects if the new item is an anomaly.
    :param new_item: Dict with keys Type, Supplier, Item Name, Total Price, Quantity, Rate, Discount
    :param vectorizer: TF-IDF Vectorizer for Item Name
    :param model: Trained KNN or LOF model
    :param scaler: Scaler used to normalize features
    :param data_features: Existing scaled features for training
    :return: Boolean indicating anomaly status
    """
    # Vectorize the Item Name and calculate similarity to assign a cluster
    new_item_vector = vectorizer.transform([new_item['Item Name']])
    similarity = cosine_similarity(new_item_vector, item_vectors)
    cluster = np.argmax(similarity)

    # Adjust Total Price for Inflation
    inflation_adjusted_price = new_item['Total Price'] * inflation_factor

    # Prepare Feature Vector
    new_features = [
        cluster,
        new_item['Quantity'],
        new_item['Rate'],
        new_item['Discount'],
        inflation_adjusted_price
    ]

    # Scale Features
    new_features_scaled = scaler.transform([new_features])

    # Predict Anomaly
    anomaly_score = model.fit_predict(np.vstack([data_features, new_features_scaled]))
    return anomaly_score[-1] == -1  # The last item in the prediction is the new item



In [25]:
# Example New Item
new_item = {
    "Type": "Battery",
    "Supplier": "E",
    "Item Name": "Battery 150AH",
    "Total Price": 154000,
    "Quantity": 3,
    "Rate": 5000,
    "Discount": 10
}

is_anomaly = detect_anomaly(new_item, vectorizer, knn, scaler, scaled_features)
print("\nIs the new item an anomaly?", is_anomaly)


Is the new item an anomaly? True


