In [19]:
import pandas as pd
import numpy as np
import json
import xml.etree.ElementTree as ET

In [2]:
manufactured = "Ut�ngy�rtott"
manufactured_correct = "Utángyártott"

In [6]:
# Initialize an empty list to hold the JSON objects
data = []

# Read the JSONL file line by line
with open("output.jsonl", 'r', encoding='utf-8') as f:
    for line in f:
        try:
            # Parse the JSON object and append to the list
            data.append(json.loads(line))
        except ValueError as e:
            print(f"Error decoding line: {line}\nError: {e}")

# Convert the list of JSON objects to a DataFrame
df = pd.DataFrame(data)

df.drop("availability", axis=1, inplace=True)

# Add a new attribute named Originality
df['originality'] = df['product_name'].apply(lambda x: 'manufactured' if manufactured in x else 'original')

# Remove "manufactured" from product_name if it exists and handle extra spaces
df['product_name'] = df['product_name'].apply(
    lambda x: x.replace(manufactured, '').replace('  ', ' ').strip() if manufactured in x else x
)


# Display the first few rows of the dataframe
print(df)

     price            competitor product_name originality
0     5685        onlinetoner.hu   HP N9K06AE    original
1     5690             pcland.hu   HP N9K06AE    original
2     6490               alza.hu   HP N9K06AE    original
3     5590         primatinta.hu   HP N9K06AE    original
4     5685        onlinetoner.hu   HP N9K06AE    original
..     ...                   ...          ...         ...
198  19002  kellekanyagonline.hu   HP T6M15AE    original
199  19032            wincity.hu   HP T6M15AE    original
200  19089                pcx.hu   HP T6M15AE    original
201  19100         totalprint.hu   HP T6M15AE    original
202  19209     homeofficeshop.hu   HP T6M15AE    original

[203 rows x 4 columns]


In [8]:
customer_df = pd.read_json("customer_request.json")

customer_df

Unnamed: 0,name,originality,customer_name,min_price,lower_min
0,Samsung MLT-D116L,manufactured,,200,10
1,Samsung MLT-D116L,original,,200,10
2,HP CZ101AE,original,,500,100
3,Canon CL-541XL Color (BS5226B005AA),original,,13250,100


In [9]:
# Check for mismatched values between product_name and name columns
mismatched_products = set(df["product_name"]) - set(customer_df["name"])

mismatched_products

{'HP N9K06AE', 'HP T6M15AE', 'Utángyártott Samsung MLT-D116L'}

In [10]:
# Create masks to check if names and originality in one DataFrame are in the other
def check_common(row, other_df):
    matches = other_df[(other_df['name'] == row['product_name']) & (other_df['originality'] == row['originality'])]
    return len(matches) > 0

In [11]:
mask_df = df.apply(lambda row: check_common(row, customer_df), axis=1)

In [12]:
#mask_customer_df = customer_df.apply(lambda row: check_common(row, df), axis=1)

# Filter the DataFrames using the masks
common_in_df = df[mask_df]

#print(set(common_in_df["product_name"].tolist()))
common_in_df


Unnamed: 0,price,competitor,product_name,originality


In [13]:
min_values = common_in_df.groupby(['product_name', 'originality']).min()

min_values

Unnamed: 0_level_0,Unnamed: 1_level_0,price,competitor
product_name,originality,Unnamed: 2_level_1,Unnamed: 3_level_1


In [14]:
# Rename 'name' column in customer_df to 'product_name'
customer_df = customer_df.rename(columns={'name': 'product_name'})

# Merge the dataframes
merged_df = pd.merge(min_values, customer_df[['product_name', 'originality', 'min_price', 'lower_min']], on=['product_name', 'originality'])

# Convert 'price' and 'lower_min' to numeric types
merged_df['price'] = pd.to_numeric(merged_df['price'], errors='coerce')
merged_df['lower_min'] = pd.to_numeric(merged_df['lower_min'], errors='coerce')

# Lower the price by 'lower_min' amount
merged_df['lowered_price'] = merged_df['price'] - merged_df['lower_min']

# If 'lowered_price' is less than 'min_price', set 'lowered_price' to 'min_price'
merged_df['lowered_price'] = np.where(merged_df['lowered_price'] < merged_df['min_price'], merged_df['min_price'], merged_df['lowered_price'])

merged_df

Unnamed: 0,product_name,originality,price,competitor,min_price,lower_min,lowered_price


In [20]:


if merged_df.empty:
    print("The DataFrame is empty. Creating a placeholder XML file.")
    # Create the root element
    root = ET.Element("root")

    # Create a tree object from the root element
    tree = ET.ElementTree(root)

    # Write the tree to an XML file
    tree.write("customer_min_prices.xml")
else:
    try:
        merged_df.to_xml('customer_min_prices.xml')
    except Exception as e:
        print(f"An {e} error occurred while converting the DataFrame to XML.")


The DataFrame is empty. Creating a placeholder XML file.
