In [1]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
import gym
from gym import spaces
import numpy as np
import random
from collections import defaultdict
import matplotlib.pyplot as plt


In [2]:
data = pd.read_csv("data.csv")
data.head()

Unnamed: 0,Make,Model,Year,Engine Fuel Type,Engine HP,Engine Cylinders,Transmission Type,Driven_Wheels,Number of Doors,Market Category,Vehicle Size,Vehicle Style,highway MPG,city mpg,Popularity,MSRP
0,BMW,1 Series M,2011,premium unleaded (required),335.0,6.0,MANUAL,rear wheel drive,2.0,"Factory Tuner,Luxury,High-Performance",Compact,Coupe,26,19,3916,46135
1,BMW,1 Series,2011,premium unleaded (required),300.0,6.0,MANUAL,rear wheel drive,2.0,"Luxury,Performance",Compact,Convertible,28,19,3916,40650
2,BMW,1 Series,2011,premium unleaded (required),300.0,6.0,MANUAL,rear wheel drive,2.0,"Luxury,High-Performance",Compact,Coupe,28,20,3916,36350
3,BMW,1 Series,2011,premium unleaded (required),230.0,6.0,MANUAL,rear wheel drive,2.0,"Luxury,Performance",Compact,Coupe,28,18,3916,29450
4,BMW,1 Series,2011,premium unleaded (required),230.0,6.0,MANUAL,rear wheel drive,2.0,Luxury,Compact,Convertible,28,18,3916,34500


In [3]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11914 entries, 0 to 11913
Data columns (total 16 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Make               11914 non-null  object 
 1   Model              11914 non-null  object 
 2   Year               11914 non-null  int64  
 3   Engine Fuel Type   11911 non-null  object 
 4   Engine HP          11845 non-null  float64
 5   Engine Cylinders   11884 non-null  float64
 6   Transmission Type  11914 non-null  object 
 7   Driven_Wheels      11914 non-null  object 
 8   Number of Doors    11908 non-null  float64
 9   Market Category    8172 non-null   object 
 10  Vehicle Size       11914 non-null  object 
 11  Vehicle Style      11914 non-null  object 
 12  highway MPG        11914 non-null  int64  
 13  city mpg           11914 non-null  int64  
 14  Popularity         11914 non-null  int64  
 15  MSRP               11914 non-null  int64  
dtypes: float64(3), int64(5

In [4]:
print(data.isnull().sum())

Make                    0
Model                   0
Year                    0
Engine Fuel Type        3
Engine HP              69
Engine Cylinders       30
Transmission Type       0
Driven_Wheels           0
Number of Doors         6
Market Category      3742
Vehicle Size            0
Vehicle Style           0
highway MPG             0
city mpg                0
Popularity              0
MSRP                    0
dtype: int64


In [5]:
data['Engine Fuel Type'] = data['Engine Fuel Type'].fillna(data['Engine Fuel Type'].mode()[0])
data['Engine HP'] = data['Engine HP'].fillna(data['Engine HP'].median())
data['Engine Cylinders'] = data['Engine Cylinders'].fillna(data['Engine Cylinders'].median())
data['Number of Doors'] = data['Number of Doors'].fillna(data['Number of Doors'].mode()[0])
current_year = 2025
data['Vehicle Age'] = current_year - data['Year']
data.drop('Year', axis=1, inplace=True)
data = data[data['Engine Cylinders'] != 0]

In [6]:
print(data.duplicated().sum()) 
data.drop_duplicates(inplace=True)

715


In [7]:
def remove_outliers_iqr(df, columns):
    df_clean = data.copy()
    for col in columns:
        Q1 = df_clean[col].quantile(0.25)
        Q3 = df_clean[col].quantile(0.75)
        IQR = Q3 - Q1
        lower_bound = Q1 - 1.5 * IQR
        upper_bound = Q3 + 1.5 * IQR
        df_clean = df_clean[(df_clean[col] >= lower_bound) & (df_clean[col] <= upper_bound)]
    return df_clean
kolone = ['Engine HP','Popularity','Vehicle Age','MSRP']
data = remove_outliers_iqr(data, kolone)

In [8]:
data.nunique()

Make                   37
Model                 668
Engine Fuel Type       10
Engine HP             255
Engine Cylinders        7
Transmission Type       5
Driven_Wheels           4
Number of Doors         3
Market Category        54
Vehicle Size            3
Vehicle Style          16
highway MPG            44
city mpg               47
Popularity             37
MSRP                 4821
Vehicle Age            24
dtype: int64

In [9]:
data.drop('Model', axis=1, inplace=True)
data.drop('Number of Doors', axis=1, inplace=True)
data.drop('Market Category', axis=1, inplace=True)
data.drop('Popularity', axis=1, inplace=True)

In [10]:
categorical_cols = ['Make', 'Engine Fuel Type', 'Transmission Type', 'Driven_Wheels', 'Vehicle Size', 'Vehicle Style']
numerical_cols = ['Engine HP', 'Engine Cylinders', 'Vehicle Age', 'highway MPG', 'city mpg', 'MSRP']

In [11]:
pre_data = data.copy()

In [12]:
pre_data.describe()

Unnamed: 0,Engine HP,Engine Cylinders,highway MPG,city mpg,MSRP,Vehicle Age
count,8848.0,8848.0,8848.0,8848.0,8848.0,8848.0
mean,232.232482,5.349005,27.205018,20.094937,30633.655628,13.630312
std,77.367854,1.391178,7.51363,6.518741,14694.955523,6.273321
min,55.0,3.0,13.0,10.0,2000.0,8.0
25%,170.0,4.0,23.0,16.0,21863.75,9.0
50%,220.0,6.0,26.0,19.0,29740.0,10.0
75%,290.0,6.0,31.0,23.0,39501.25,17.0
max,485.0,12.0,354.0,128.0,71500.0,31.0


In [13]:
# Label encoding
for col in categorical_cols:
    le = LabelEncoder()
    data[col] = le.fit_transform(data[col])

In [14]:
# Scaling
scaler = MinMaxScaler()
data[numerical_cols] = scaler.fit_transform(data[numerical_cols])

In [15]:
data.describe()

Unnamed: 0,Make,Engine Fuel Type,Engine HP,Engine Cylinders,Transmission Type,Driven_Wheels,Vehicle Size,Vehicle Style,highway MPG,city mpg,MSRP,Vehicle Age
count,8848.0,8848.0,8848.0,8848.0,8848.0,8848.0,8848.0,8848.0,8848.0,8848.0,8848.0,8848.0
mean,18.013788,8.056623,0.412169,0.261001,1.401786,1.714738,0.994801,8.307301,0.041657,0.08555,0.411995,0.244796
std,11.513657,1.758399,0.179925,0.154575,0.869373,1.049148,0.891526,4.967413,0.022034,0.055244,0.211438,0.272753
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,6.0,8.0,0.267442,0.111111,1.0,1.0,0.0,3.0,0.029326,0.050847,0.285809,0.043478
50%,16.0,9.0,0.383721,0.333333,1.0,2.0,1.0,9.0,0.038123,0.076271,0.399137,0.086957
75%,29.25,9.0,0.546512,0.333333,1.0,2.0,2.0,14.0,0.052786,0.110169,0.539586,0.391304
max,36.0,9.0,1.0,1.0,4.0,3.0,2.0,15.0,1.0,1.0,1.0,1.0


In [16]:
class CarSelectionEnv(gym.Env):
    def __init__(self, cars, preference_weights=None, max_steps=500):
        """
        Inicijalizira okruženje za odabir automobila.
        - Sprema značajke automobila koje se koriste za evaluaciju.
        - Postavlja težine preferencija korisnika (što mu je važnije).
        - Definira akcijski prostor (biranje auta ili "stop").
        - Definira prostor opažanja kao binarni vektor koji označava odabrane aute.
        - Postavlja početne varijable za korake i odabir auta.
        """
        super(CarSelectionEnv, self).__init__()
        selected_features = [2, 9, 10, 11]  # Engine HP, city mpg, MSRP, Vehicle Age
        self.cars = cars[:, selected_features]
        self.n_cars = self.cars.shape[0]
        self.max_steps = max_steps
        self.current_step = 0

        if preference_weights is None:
            self.preference_weights = np.array([0.7, 0.9, 0.1, 0.3])
        else:
            self.preference_weights = preference_weights

        # Akcije: biranje jednog od n automobila ili "stop" akcija (nije biranje)
        self.action_space = spaces.Discrete(self.n_cars + 1)
        # Opažanja: binarni niz koji označava jesu li pojedini automobili odabrani (0 ili 1)
        self.observation_space = spaces.MultiBinary(self.n_cars)

        self.selected_index = None  # indeks odabranog auta, još nije odabran
        self.done = False
        self.obs = np.zeros(self.n_cars, dtype=np.int8)  # trenutno opažanje (nijedan auto nije odabran)

    def reset(self):
        """
        Resetira okruženje u početno stanje.
        - Briše odabrani auto.
        - Postavlja zastavicu 'done' na False.
        - Resetira opažanja i broj koraka.
        - Vraća početno opažanje.
        """
        self.selected_index = None
        self.done = False
        self.obs.fill(0)
        self.current_step = 0
        return self.obs.copy()

    def _get_obs(self):
        """
        Pomoćna metoda koja vraća trenutno opažanje.
        Koristi se da se spriječi nepredviđeno mijenjanje unutarnjeg stanja.
        """
        return self.obs.copy()

    def step(self, action):
        """
        Izvršava jednu akciju u okruženju.
        - Ako je epizoda završena, vraća trenutno opažanje bez promjene.
        - Provjerava prekoračenje maksimalnog broja koraka i završava epizodu ako treba.
        - Provjerava je li akcija valjana.
        - Ako agent odluči stati bez odabira auta, završava epizodu s nagradom 0.
        - Ako je auto već odabran, ignorira daljnje akcije.
        - Inače, odabire auto, postavlja stanje na "završeno" i računa nagradu.
        - Vraća novo opažanje, nagradu, zastavicu završetka i dodatne informacije.
        """
        if self.done:
            return self._get_obs(), 0, self.done, {}

        self.current_step += 1

        if self.current_step > self.max_steps:
            self.done = True
            reward = self._calculate_reward()
            return self._get_obs(), reward, self.done, {}

        if not (0 <= action <= self.n_cars):
            raise ValueError(f"Neispravna akcija: {action}")

        if action == self.n_cars:
            self.done = True
            reward = self._calculate_reward()
            return self._get_obs(), reward, self.done, {}

        if self.selected_index is not None:
            return self._get_obs(), 0, self.done, {}

        self.selected_index = action
        self.obs[action] = 1
        self.done = True
        reward = self._calculate_reward()

        return self._get_obs(), reward, self.done, {}

    def _calculate_reward(self):
        """
        Računa nagradu za trenutno odabrani auto.
        - Ako nije odabran nijedan auto, nagrada je 0.
        - Inače, nagrada je skalarni produkt značajki automobila i težina preferencija.
          To znači da se nagrada povećava ako auto bolje odgovara preferencijama korisnika.
        """
        if self.selected_index is None:
            return 0.0

        car_features = self.cars[self.selected_index]
        reward = np.dot(car_features, self.preference_weights)

        return reward

In [46]:
class QLearningAgent:
    def __init__(self, n_actions, alpha=0.1, gamma=0.95,
                 epsilon=1.0, epsilon_decay=0.999, min_epsilon=0.01):
        """
        Q-learning agent s tablicom za diskretne akcije i binarna stanja.
        n_actions: broj mogućih akcija
        alpha: stopa učenja
        gamma: diskontni faktor
        epsilon: početna epsilon za epsilon-greedy politiku
        epsilon_decay: faktor smanjenja epsilona nakon svake epizode
        min_epsilon: minimalna vrijednost epsilona
        """
        self.q_table = defaultdict(lambda: np.zeros(n_actions, dtype=np.float32))
        self.alpha = alpha
        self.gamma = gamma
        self.epsilon = epsilon
        self.epsilon_decay = epsilon_decay
        self.min_epsilon = min_epsilon
        self.n_actions = n_actions

    def get_action(self, state):
        """
        Izaberi akciju koristeći epsilon-greedy politiku.
        state: trenutno stanje (npr. binarni numpy array)
        return: indeks odabrane akcije
        """
        state_key = tuple(state)  # binarno stanje kao tuple za key u dict
        if np.random.rand() < self.epsilon:
            # Eksploracija
            return random.randint(0, self.n_actions - 1)
        else:
            # Eksploatacija - najbolja akcija prema Q tablici
            return int(np.argmax(self.q_table[state_key]))

    def update(self, state, action, reward, next_state, done):
        """
        Ažuriraj Q-vrijednost za par (state, action) koristeći Q-learning formulu.
        state: trenutno stanje
        action: odabrana akcija
        reward: dobivena nagrada
        next_state: sljedeće stanje
        done: bool, je li epizoda završena
        """
        state_key = tuple(state)
        next_state_key = tuple(next_state)

        max_future_q = 0.0 if done else np.max(self.q_table[next_state_key])
        current_q = self.q_table[state_key][action]

        # Q-learning update formula
        new_q = current_q + self.alpha * (reward + self.gamma * max_future_q - current_q)
        self.q_table[state_key][action] = new_q

        # Smanji epsilon samo na kraju epizode
        if done:
            self.epsilon = max(self.min_epsilon, self.epsilon * self.epsilon_decay) 

In [48]:
# --- Inicijalizacija podataka i okruženja ---
cars_array = data.values.astype(np.float32)  # tvoj DataFrame u numpy array
preference_weights = np.array([0.1, 1.,-0.2,-0.1])

env = CarSelectionEnv(
    cars_array,
    preference_weights=preference_weights,
    max_steps=1000
)

agent = QLearningAgent(n_actions=env.action_space.n)

n_episodes = 5000
rewards = []

# --- TRENING ---
for episode in range(1, n_episodes + 1):
    state = env.reset()
    total_reward = 0
    done = False

    while not done:
        action = agent.get_action(state)  # Epsilon-greedy odabir akcije
        next_state, reward, done, _ = env.step(action)
        agent.update(state, action, reward, next_state, done)
        state = next_state
        total_reward += reward
    
    rewards.append(total_reward)

    if episode % 100 == 0:
        print(f"Epizoda {episode} - Nagrada: {total_reward:.3f} - Epsilon: {agent.epsilon:.3f}")

Epizoda 100 - Nagrada: 0.922 - Epsilon: 0.905
Epizoda 200 - Nagrada: 0.020 - Epsilon: 0.819
Epizoda 300 - Nagrada: -0.030 - Epsilon: 0.741
Epizoda 400 - Nagrada: 0.123 - Epsilon: 0.670
Epizoda 500 - Nagrada: -0.028 - Epsilon: 0.606
Epizoda 600 - Nagrada: 0.922 - Epsilon: 0.549
Epizoda 700 - Nagrada: 0.922 - Epsilon: 0.496
Epizoda 800 - Nagrada: -0.045 - Epsilon: 0.449
Epizoda 900 - Nagrada: -0.018 - Epsilon: 0.406
Epizoda 1000 - Nagrada: 0.922 - Epsilon: 0.368
Epizoda 1100 - Nagrada: 0.036 - Epsilon: 0.333
Epizoda 1200 - Nagrada: 0.922 - Epsilon: 0.301
Epizoda 1300 - Nagrada: 0.073 - Epsilon: 0.272
Epizoda 1400 - Nagrada: 0.922 - Epsilon: 0.246
Epizoda 1500 - Nagrada: 0.922 - Epsilon: 0.223
Epizoda 1600 - Nagrada: 0.922 - Epsilon: 0.202
Epizoda 1700 - Nagrada: 0.922 - Epsilon: 0.183
Epizoda 1800 - Nagrada: 0.922 - Epsilon: 0.165
Epizoda 1900 - Nagrada: 0.922 - Epsilon: 0.149
Epizoda 2000 - Nagrada: 0.922 - Epsilon: 0.135
Epizoda 2100 - Nagrada: 0.922 - Epsilon: 0.122
Epizoda 2200 - Nag

In [50]:
# Sačuvaj originalni epsilon i postavi ga na 0 da agent ne istražuje tijekom evaluacije
original_epsilon = agent.epsilon
agent.epsilon = 0.0

state = env.reset()
done = False
total_reward = 0

while not done:
    action = agent.get_action(state)
    next_state, reward, done, _ = env.step(action)
    state = next_state
    total_reward += reward

selected_car_index = env.selected_index

if selected_car_index is not None:
    print("Odabrani auto nakon evaluacije:")
    print(pre_data.iloc[selected_car_index])  # ispis podataka iz pre_data za odabrani auto
else:
    print("Agent nije odabrao nijedan auto.")

print(f"Ukupna nagrada: {total_reward:.3f}")

# Vrati epsilon na originalnu vrijednost
agent.epsilon = original_epsilon

Odabrani auto nakon evaluacije:
Make                         Chevrolet
Engine Fuel Type              electric
Engine HP                        200.0
Engine Cylinders                   6.0
Transmission Type         DIRECT_DRIVE
Driven_Wheels        front wheel drive
Vehicle Size                   Compact
Vehicle Style            4dr Hatchback
highway MPG                        110
city mpg                           128
MSRP                             40905
Vehicle Age                          8
Name: 1983, dtype: object
Ukupna nagrada: 0.922
