<a href="https://colab.research.google.com/github/Esandu-Meth-Obadaarachchi/ItempricePrediction/blob/main/PricePrediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.neighbors import LocalOutlierFactor

In [2]:
# Load the data
data = pd.DataFrame({
    "Type": ["Battery", "Battery", "Battery", "Inverter"],
    "Supplier": ["A", "B", "C", "D"],
    "Item Name": ["Battery 120AH", "120AH Battery", "Battery 90AH", "Inverter 5KW"],
    "Total Price": [10000, 10500, 7000, 50000],
    "Quantity": [2, 2, 1, 1],
    "Rate": [5000, 5250, 7000, 50000],
    "Discount": [5, 5, 0, 10],
    "Warranty": ["2 years", "2 years", "1 year", "5 years"]
})


import pandas as pd

data = pd.DataFrame({
    "Type": [
        "Battery", "Battery", "Battery", "Inverter", "Inverter", "Rectifier", "Rectifier", "Solar Panel",
        "Solar Panel", "Generator", "Generator", "Precision AC", "Precision AC", "Battery", "Battery",
        "Inverter", "Inverter", "Rectifier", "Rectifier", "Solar Panel", "Solar Panel", "Generator",
        "Generator", "Precision AC", "Precision AC"
    ],
    "Supplier": [
        "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S",
        "T", "U", "V", "W", "X", "Y"
    ],
    "Item Name": [
        "Battery 120AH", "Battery 90AH", "Battery 150AH", "Inverter 3KW", "Inverter 5KW",
        "Rectifier 48V", "Rectifier 24V", "Solar Panel 200W", "Solar Panel 300W", "Generator 10KVA",
        "Generator 20KVA", "Precision AC 3 Ton", "Precision AC 5 Ton", "Battery 110AH",
        "Battery 100AH", "Inverter 2KW", "Inverter 6KW", "Rectifier 48V High Output",
        "Rectifier 12V", "Solar Panel 250W", "Solar Panel 500W", "Generator 15KVA", "Generator 25KVA",
        "Precision AC 2 Ton", "Precision AC 4 Ton"
    ],
    "Total Price": [
        10000, 8000, 15000, 30000, 50000, 18000, 12000, 25000, 37000, 75000, 120000, 90000, 150000,
        9500, 8500, 28000, 52000, 20000, 10000, 30000, 60000, 80000, 130000, 85000, 140000
    ],
    "Quantity": [
        2, 1, 3, 1, 1, 2, 2, 3, 2, 1, 1, 2, 3, 1, 1, 1, 1, 2, 1, 3, 2, 1, 1, 2, 3
    ],
    "Rate": [
        5000, 8000, 5000, 30000, 50000, 9000, 6000, 8300, 18500, 75000, 120000, 45000, 50000,
        9500, 8500, 28000, 52000, 10000, 10000, 10000, 30000, 80000, 130000, 42500, 46666
    ],
    "Discount": [
        5, 0, 10, 15, 20, 5, 0, 10, 15, 0, 0, 10, 15, 5, 5, 0, 20, 0, 0, 10, 20, 5, 5, 10, 15
    ],
    "Warranty": [
        "2 years", "1 year", "3 years", "2 years", "5 years", "1 year", "1 year", "10 years",
        "15 years", "3 years", "5 years", "10 years", "15 years", "2 years", "2 years", "3 years",
        "5 years", "1 year", "1 year", "12 years", "15 years", "3 years", "5 years", "7 years", "10 years"
    ]
})

print(data)


In [3]:
# Step 1.1: Handle Missing Values
imputer = SimpleImputer(strategy='mean')
data[['Total Price', 'Quantity', 'Rate', 'Discount']] = imputer.fit_transform(data[['Total Price', 'Quantity', 'Rate', 'Discount']])

In [4]:
# Step 1.2: Normalize Item Names using TF-IDF
vectorizer = TfidfVectorizer()
item_vectors = vectorizer.fit_transform(data['Item Name'])
similarity_matrix = cosine_similarity(item_vectors)
data['Cluster'] = np.argmax(similarity_matrix, axis=1)  # Cluster similar items

In [5]:
# Step 1.3: Adjust for Inflation (Assume inflation index is provided)
# Example inflation adjustment (factor = 1.02 for 2% inflation rate)
inflation_factor = 1.02
data['Inflation Adjusted Price'] = data['Total Price'] * inflation_factor

In [6]:
print("Preprocessed Data:")
print(data)

Preprocessed Data:
       Type Supplier      Item Name  Total Price  Quantity     Rate  Discount  \
0   Battery        A  Battery 120AH      10000.0       2.0   5000.0       5.0   
1   Battery        B  120AH Battery      10500.0       2.0   5250.0       5.0   
2   Battery        C   Battery 90AH       7000.0       1.0   7000.0       0.0   
3  Inverter        D   Inverter 5KW      50000.0       1.0  50000.0      10.0   

  Warranty  Cluster  Inflation Adjusted Price  
0  2 years        0                   10200.0  
1  2 years        0                   10710.0  
2   1 year        2                    7140.0  
3  5 years        3                   51000.0  


In [7]:
# Prepare features for clustering
features = data[['Cluster', 'Quantity', 'Rate', 'Discount', 'Inflation Adjusted Price']].copy()

# Normalize features
scaler = StandardScaler()
scaled_features = scaler.fit_transform(features)

# Train the KNN model
knn = LocalOutlierFactor(n_neighbors=5, contamination=0.1)  # Adjust `contamination` as needed
data['Anomaly Score'] = knn.fit_predict(scaled_features)

# Mark anomalies
data['Is Anomaly'] = data['Anomaly Score'] == -1

print("\nData with Anomaly Detection Results:")
print(data)


Data with Anomaly Detection Results:
       Type Supplier      Item Name  Total Price  Quantity     Rate  Discount  \
0   Battery        A  Battery 120AH      10000.0       2.0   5000.0       5.0   
1   Battery        B  120AH Battery      10500.0       2.0   5250.0       5.0   
2   Battery        C   Battery 90AH       7000.0       1.0   7000.0       0.0   
3  Inverter        D   Inverter 5KW      50000.0       1.0  50000.0      10.0   

  Warranty  Cluster  Inflation Adjusted Price  Anomaly Score  Is Anomaly  
0  2 years        0                   10200.0              1       False  
1  2 years        0                   10710.0              1       False  
2   1 year        2                    7140.0             -1        True  
3  5 years        3                   51000.0              1       False  




In [8]:
# Function to check if a new item is an anomaly
def detect_anomaly(new_item, model, scaler, data_features):
    # Scale new item
    new_item_scaled = scaler.transform([new_item])

    # Predict anomaly
    anomaly_score = model.fit_predict(np.vstack([data_features, new_item_scaled]))

    # Return if the new item is an anomaly
    return anomaly_score[-1] == -1

# Example new item
new_item = [1, 2, 6000, 5, 10200]  # Cluster, Quantity, Rate, Discount, Inflation Adjusted Price
is_anomaly = detect_anomaly(new_item, knn, scaler, scaled_features)

print("\nIs the new item an anomaly?", is_anomaly)


Is the new item an anomaly? True


