In [None]:
import pandas as pd
import numpy as np
import re

In [108]:
df = pd.read_csv('data/lawnmowers.csv')
df.head()

Unnamed: 0,NAME,WIDTH,PRICE,POWER,GRASS,AREA,DESCRIPTION
0,Bear CNR865,,15000.0,0.75,70.0,550.0,Средняя ширина скашиваемой полосы на данной га...
1,Wolf MTB712,36.0,36000.0,0.9,35.0,550.0,Средняя ширина скашиваемой полосы на данной га...
2,Nakita IZP916,37.0,25000.0,1.4,,300.0,Средняя ширина скашиваемой полосы на данной га...
3,Wolf HHQ968,38.0,14000.0,1.9,30.0,200.0,Средняя ширина скашиваемой полосы на данной га...
4,Worx BRL669,38.0,14000.0,1.0,55.0,500.0,Средняя ширина скашиваемой полосы на данной га...


In [109]:
pattern = r"(?i)(узкий|узкая|узкие|узкой|узкую|узким|узким|узким|узким|узком|узком|узком|узком)"
df["IS_NARROW_WIDTH"] = df["DESCRIPTION"].apply(lambda text: 1 if re.search(pattern, text) else 0)
df[["IS_NARROW_WIDTH", "DESCRIPTION"]].value_counts()
median_width = df.WIDTH[(df.WIDTH >= 30) & (df.WIDTH <= 35)].median()
df.loc[(df.WIDTH.isna()) & (df.IS_NARROW_WIDTH == 1), "WIDTH"] = median_width
df = df.drop(columns=["IS_NARROW_WIDTH"])

print(f"Median width: {median_width}")
df[["WIDTH"]].value_counts()

Median width: 32.0


WIDTH
32.0     23
38.0     18
36.0     17
39.0     17
41.0     16
31.0     12
34.0     11
37.0     11
40.0     11
42.0     11
44.0     11
43.0     10
30.0      9
35.0      7
33.0      6
Name: count, dtype: int64

In [110]:
pattern = r"(?i)(средняя ширина|ширина средняя)"
df["IS_AVERAGE_WIDTH"] = df["DESCRIPTION"].apply(lambda text: 1 if re.search(pattern, text) else 0)
df[["IS_AVERAGE_WIDTH", "DESCRIPTION"]].value_counts()
median_width = df.WIDTH[(df.WIDTH >= 36) & (df.WIDTH <= 40)].median()
df.loc[(df.IS_AVERAGE_WIDTH == 1) & (df.WIDTH.isna()), "WIDTH"] = median_width
df = df.drop(columns=["IS_AVERAGE_WIDTH"])

print(f"Median width: {median_width}")
df[["WIDTH"]].value_counts()

Median width: 38.0


WIDTH
38.0     24
32.0     23
36.0     17
39.0     17
41.0     16
31.0     12
34.0     11
37.0     11
40.0     11
42.0     11
44.0     11
43.0     10
30.0      9
35.0      7
33.0      6
Name: count, dtype: int64

In [111]:
pattern = r"(?i)(широкая|широкие|широких|широкой|широкую|широким|широким|широким|широким|широком|широком|широком|широком)"
df["IS_WIDE_STRIPES"] = df["DESCRIPTION"].apply(lambda text: 1 if re.search(pattern, text) else 0)
df[["IS_WIDE_STRIPES", "DESCRIPTION"]].value_counts()
median_width = df.WIDTH[(df.WIDTH >= 41) & (df.WIDTH <= 45)].median()
df.loc[(df.IS_WIDE_STRIPES == 1) & (df.WIDTH.isna()), "WIDTH"] = median_width
df = df.drop(columns=["IS_WIDE_STRIPES"])

print(f"Median width: {median_width}")
df[["WIDTH"]].value_counts()

Median width: 42.0


WIDTH
38.0     24
32.0     23
36.0     17
39.0     17
41.0     16
42.0     15
31.0     12
34.0     11
37.0     11
40.0     11
44.0     11
43.0     10
30.0      9
35.0      7
33.0      6
Name: count, dtype: int64

In [None]:
mean_area = df['WIDTH'].mean()
print(f"Mean width: {mean_area}")

Mean width: 37.145


In [122]:
Q1 = df["PRICE"].quantile(0.25)
Q3 = df["PRICE"].quantile(0.75)
IQR = Q3 - Q1

lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR

df_filtered = df[(df["PRICE"] >= lower_bound) & (df["PRICE"] <= upper_bound)]

df_cleaned = df_filtered.dropna()

print(f"Mean price: {df_cleaned['PRICE'].mean()}")
print(df_cleaned.head())

Mean price: 27876.404494382023
          NAME  WIDTH    PRICE  POWER  GRASS   AREA  \
0  Bear CNR865   38.0  15000.0   0.75   70.0  550.0   
1  Wolf MTB712   36.0  36000.0   0.90   35.0  550.0   
3  Wolf HHQ968   38.0  14000.0   1.90   30.0  200.0   
4  Worx BRL669   38.0  14000.0   1.00   55.0  500.0   
5  Worx HZX136   32.0  26000.0   1.80   50.0  600.0   

                                         DESCRIPTION  
0  Средняя ширина скашиваемой полосы на данной га...  
1  Средняя ширина скашиваемой полосы на данной га...  
3  Средняя ширина скашиваемой полосы на данной га...  
4  Средняя ширина скашиваемой полосы на данной га...  
5  Новая газонокосилка имеет узкую ширину скашива...  


In [129]:
def exponential_normalization(x):
    return 1 - np.exp(1-(x / x.min()))

df_norm = df_cleaned.copy()
df_norm["WIDTH_NORM"] = exponential_normalization(df_cleaned["WIDTH"])
df_norm["PRICE_NORM"] = exponential_normalization(df_cleaned["PRICE"])
df_norm["POWER_NORM"] = exponential_normalization(df_cleaned["POWER"])
df_norm["GRASS_NORM"] = exponential_normalization(df_cleaned["GRASS"])
df_norm["AREA_NORM"] = exponential_normalization(df_cleaned["AREA"])

df_norm.where(df.NAME == "Cooper AIS883").dropna().head()


Unnamed: 0,NAME,WIDTH,PRICE,POWER,GRASS,AREA,DESCRIPTION,WIDTH_NORM,PRICE_NORM,POWER_NORM,GRASS_NORM,AREA_NORM
192,Cooper AIS883,43.0,24000.0,1.4,70.0,450.0,Широкая полоса скашивания на данной газонокоси...,0.351656,0.864665,0.57965,0.834701,0.713495


In [None]:
df_norm["SCORE"] = (
    df_norm["WIDTH_NORM"] * 3 +
    (1 - df_norm["PRICE_NORM"]) * 7 +   
    df_norm["POWER_NORM"] * 8 +
    df_norm["GRASS_NORM"] * 1 +
    df_norm["AREA_NORM"] * 1
)
df_norm = df_norm.sort_values(by="SCORE", ascending=False)
top_lawnmowers = df_norm[["NAME", "SCORE"]].head(3)
print(", ".join(top_lawnmowers.NAME.values))

Nakita ZKF285, Bear QHM833, Boch TFV888
