In [1]:
import pandas as pd
import numpy as np
import glob
import os
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
from scipy import stats

import statsmodels.api as sm
from statsmodels.formula.api import ols
import statsmodels.formula.api as smf

from statsmodels.stats.multicomp import pairwise_tukeyhsd

import re
import nltk


from sklearn.feature_extraction.text import CountVectorizer

from sklearn.pipeline import make_pipeline
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler

In [37]:
def clear():
    os.system('cls')

def ListNoDups(mylist):
    mylist = list(dict.fromkeys(mylist))
    return mylist

class CarData:
    
    missing = -1
    duplicates = -1
    
    price_outliers = -1
    mileage_outliers = -1
    year_outliers = -1
    total_discard = -1
    
    corpus = []
    def __init__(self, path, price_outlier_mt = 200000, mileage_outlier_mt = 400000,
                year_outlier_lt = 1995, engine_outlier_mt = 4000, engine_outlier_lt = 750,
                dependent_variable = 'price',
                categorical_variables = ['engine_type', 'city', 'province'],
                numeric_variables = ['price', 'mileage_km', 'engine_cm3', 'year']):
        
        #define outliers values
        self.price_outlier_mt = price_outlier_mt
        self.mileage_outlier_mt = mileage_outlier_mt
        self.year_outlier_lt =  year_outlier_lt
        self.engine_outlier_mt = engine_outlier_mt
        self.engine_outlier_lt = engine_outlier_lt
        self.dependent_variable = dependent_variable
        

        
        
        #define variable data types
        self.numeric_variables = numeric_variables
        self.categorical_variables = categorical_variables
        
        #read all .csv files from the directory
        self.data = pd.concat(map(pd.read_csv, glob.glob(os.path.join(path, "*.csv"))), sort=False)
        
        #drop the duplicates and save the number of duplicates - many duplicates due to data gathering method
        self.duplicates = len(self.data) - len(self.data.drop_duplicates())
        self.data.drop_duplicates(inplace = True)
        
        #rename columns
        self.data.columns = ['title', 'price', 'sub_title', 'mileage_km', 'year', 'engine_cm3',
                'engine_type', 'city', 'province', 'negotiable']
        
        #drop NaNs and save the number of rows dropped to the missing varaible
        self.missing = self.data['engine_type'].isna().sum()
        self.data.dropna(subset = ['engine_type'], axis = 'index', inplace = True)
        
        self.missing = self.missing + self.data['city'].isna().sum()
        self.data.dropna(subset = ['city'], axis = 'index', inplace = True)
        
        self.missing = self.missing + self.data['engine_cm3'].isna().sum()
        self.data.dropna(subset = ['engine_cm3'], axis = 'index', inplace = True)
        
        #clean up the columns
        self.data['price'] = self.data['price'].apply(lambda x: x.replace(",", ""))
        self.data['price'] = self.data['price'].apply(lambda x: x.replace(" ", "")).astype('int')
        
        self.data['mileage_km'] = self.data['mileage_km'].apply(lambda x: x.replace("km", ""))
        self.data['mileage_km'] = self.data['mileage_km'].apply(lambda x: x.replace(" ", "")).astype('float')
        
        self.data['engine_cm3'] = self.data['engine_cm3'].astype('str')
        self.data['engine_cm3'] = self.data['engine_cm3'].apply(lambda x: x.replace('cm3', ''))
        self.data['engine_cm3'] = self.data['engine_cm3'].apply(lambda x: x.replace(' ','')).astype('int')
        
        self.data['province'] = self.data['province'].astype('str')
        self.data['province'] = self.data['province'].apply(lambda x: x.replace('(',''))
        self.data['province'] = self.data['province'].apply(lambda x: x.replace(')',''))
        
        self.data['sub_title'] = self.data['sub_title'].astype('str') #may change that in the future - possible info loss due to lowercase
        
        
        self.data['title'] = self.data['title'].astype('str') #may change that in the future - possible info loss due to lowercase
        
        self.data['negotiable'] = self.data['negotiable'].astype('str')
        
        #Add ID column
        self.data.insert(loc = 0, column = 'ID', value = range(1, len(self.data)+1))

        #discard outliers and calculate the numbers
        self.total_discard = len(self.data) - len(self.data[(self.data['price'] <= self.price_outlier_mt) &
                                                        (self.data['mileage_km'] <= self.mileage_outlier_mt) &
                                                        (self.data['year'] >= self.year_outlier_lt) &
                                                        (self.data['engine_cm3'] <= self.engine_outlier_mt) &
                                                        (self.data['engine_cm3'] >= self.engine_outlier_lt)])        
        
        self.price_outliers = len(self.data[self.data['price'] > price_outlier_mt])
        self.data = self.data[self.data['price'] <= price_outlier_mt]
        
        self.mileage_outliers = len(self.data[self.data['mileage_km'] > mileage_outlier_mt])
        self.data = self.data[self.data['mileage_km'] <= mileage_outlier_mt]
        
        self.year_outliers = len(self.data[self.data['year'] < year_outlier_lt])
        self.data = self.data[self.data['year'] >= year_outlier_lt]
        
        self.engine_outliers = len(self.data[(self.data['engine_cm3'] > engine_outlier_mt) |
                                            (self.data['engine_cm3'] < engine_outlier_lt)])
        self.data = self.data[(self.data['engine_cm3'] <= engine_outlier_mt) & 
                             (self.data['engine_cm3'] >= engine_outlier_lt)]
        
        #NLP
        self.data['concat_title_subtitle'] = self.data['title'] + ' ' + self.data['sub_title']
        self.data['concat_title_subtitle'] = self.data['concat_title_subtitle'].apply(lambda x: x.lower())
        
        #replace problematic cases for NLP for title and subtitle
        self.data['concat_title_subtitle'] = self.data['concat_title_subtitle'].apply(lambda x: x.replace('+',' '))
        self.data['concat_title_subtitle'] = self.data['concat_title_subtitle'].apply(lambda x: x.replace('(',' '))
        self.data['concat_title_subtitle'] = self.data['concat_title_subtitle'].apply(lambda x: x.replace(')',' '))
        self.data['concat_title_subtitle'] = self.data['concat_title_subtitle'].apply(lambda x: x.replace('**',' '))
        self.data['concat_title_subtitle'] = self.data['concat_title_subtitle'].apply(lambda x: x.replace('*',' '))
        self.data['concat_title_subtitle'] = self.data['concat_title_subtitle'].apply(lambda x: x.replace(']',' '))
        self.data['concat_title_subtitle'] = self.data['concat_title_subtitle'].apply(lambda x: x.replace('[',' '))
        self.data['concat_title_subtitle'] = self.data['concat_title_subtitle'].apply(lambda x: x.replace("/"," "))
        self.data['concat_title_subtitle'] = self.data['concat_title_subtitle'].apply(lambda x: x.replace("\\"," "))
        self.data['concat_title_subtitle'] = self.data['concat_title_subtitle'].apply(lambda x: x.replace(',',' '))
        self.data['concat_title_subtitle'] = self.data['concat_title_subtitle'].apply(lambda x: x.replace('?',' '))
        self.data['concat_title_subtitle'] = self.data['concat_title_subtitle'].apply(lambda x: x.replace('.',' '))
        self.data['concat_title_subtitle'] = self.data['concat_title_subtitle'].apply(lambda x: x.replace('!',' '))
        self.data['concat_title_subtitle'] = self.data['concat_title_subtitle'].apply(lambda x: x.replace('_',' '))
        self.data['concat_title_subtitle'] = self.data['concat_title_subtitle'].apply(lambda x: x.replace('-',' '))
        self.data['concat_title_subtitle'] = self.data['concat_title_subtitle'].apply(lambda x: x.replace('|',' '))
        self.data['concat_title_subtitle'] = self.data['concat_title_subtitle'].apply(lambda x: x.replace('#',' '))
        self.data['concat_title_subtitle'] = self.data['concat_title_subtitle'].apply(lambda x: x.replace('%',' '))
        self.data['concat_title_subtitle'] = self.data['concat_title_subtitle'].apply(lambda x: x.replace('~',' '))  
        self.data['concat_title_subtitle'] = self.data['concat_title_subtitle'].apply(lambda x: x.replace('*',' ')) 
        self.data['concat_title_subtitle'] = self.data['concat_title_subtitle'].apply(lambda x: x.replace('*',' '))
        
        #NLP preprocessing for location
        self.data['province'] = self.data['province'].apply(lambda x: x.lower()) 
        self.data['province'] = self.data['province'].apply(lambda x: x.replace('ą', 'a')) 
        self.data['province'] = self.data['province'].apply(lambda x: x.replace('ę', 'e'))
        self.data['province'] = self.data['province'].apply(lambda x: x.replace('ł', 'l'))
        self.data['province'] = self.data['province'].apply(lambda x: x.replace('ś', 's'))
        self.data['province'] = self.data['province'].apply(lambda x: x.replace('ć', 'c'))
        self.data['province'] = self.data['province'].apply(lambda x: x.replace('ż', 'z'))
        
    def describe(self):
        #descriptive statistice
        desc_stats = round(pd.DataFrame(
                        data = self.data[self.numerical_variables].describe(),
                        columns = self.data[self.numerical_variables].columns),2)
        return desc_stats
    
    def outliers(self):
        #baisc data about outliers discarded during preprocessing
        print('Offers with price greater than '+str(self.price_outlier_mt)+' have been discarded')
        print('The number of such offers = '+str(self.price_outliers))
        print('')
        print('Offers with mileage greater than '+str(self.mileage_outlier_mt)+' have been discarded')
        print('The number of such offers = '+str(self.mileage_outliers))
        print('')
        print('Offers with year lower than '+str(self.year_outlier_lt)+' have been discarded')
        print('The number of such offers = '+str(self.year_outliers))
        print('')
        print('Offers with engine_cm3 greater than '+str(self.engine_outlier_lt)+
              ' and lower than '+str(self.engine_outlier_mt)+' have been discarded')
        print('The number of such offers = '+str(self.engine_outliers))
        print('')
        print('Total number of discarded offers = '+str(self.total_discard)
              +'('+str(round(self.total_discard/len(self.data)*100,2))+'%)'
              +' - may be different to the sum of above due to overlap')
        
    def scatter_nox(self, var = 'all', figsize_1 = 7, figsize_2 = 5):
        #prints scatter plots with no x axis - a dummy sequence as x axis
        if var != 'all' and var not in self.data.columns:
            print('Variable not found in the dataset')
        if var == 'all':
            plt.rcParams["figure.figsize"] = (figsize_1,figsize_2)
            plt.scatter(y = self.data['mileage_km'], x = range(1, len(self.data)+1), s=1)
            plt.title('mileage_km')
            plt.show()

            plt.scatter(y = self.data['price'], x = range(1, len(self.data)+1), s=1)
            plt.title('price')
            plt.show()

            plt.scatter(y = self.data['year'], x = range(1, len(self.data)+1), s=1)
            plt.title('year')
            plt.show()

            plt.scatter(y = self.data['engine_cm3'], x = range(1, len(self.data)+1), s=1)
            plt.title('engine_cm3')
            plt.show()
        else:
            plt.rcParams["figure.figsize"] = (figsize_1,figsize_2)
            plt.scatter(y = self.data[var], x = range(1, len(self.data)+1), s=1)
            plt.title(var)
            plt.show()
            
    def scatter(self, var = 'all'):
        #prints scatter plots for numerical variables
        if var != 'all' and var not in self.data.columns:
            print('Variable not found in the dataset')
        list_comb = []
        if var == 'all':
            for variable1 in enumerate(self.numeric_variables):
                for variable2 in enumerate(self.numeric_variables):
                    if variable1 != variable2 and variable1[1]+variable2[1] not in list_comb and variable2[1]+variable1[1] not in list_comb:
                        plt.scatter(y = self.data[variable1[1]], x = self.data[variable2[1]], s=1)
                        plt.title("Correlation between "+variable1[1]+' and '+variable2[1])
                        plt.ylabel(variable1[1])
                        plt.xlabel(variable2[1])
                        plt.show()
                        list_comb.append(variable1[1]+variable2[1])
        #else: - TO DO
    
    def hist(self, var = 'all', bins = 50):
        if var != 'all' and var not in self.data.columns:
            print('Variable not found in the dataset')
        #prints histograms for numeric variables
        if var == 'all':
            for variable in enumerate(self.numeric_variables):
                plt.hist(x = self.data[variable[1]], bins = bins)
                plt.title(variable[1])
                plt.show()
     
    def price_cat_vars(self, variables = '_NULL_'):
        
        if variables == '_NULL_':
            variables = self.categorical_variables
            
        for variable in enumerate(variables):
            # shows desrptive statistics of categorical variables
            print(self.data.groupby(self.data[variable[1]])['price'].describe())
            #the variables need further preprocessing
            
    def add_dummies(self, categorical_list, columns_to_check, delete_from_strings = 'yes'):
        #adds dummmies from cat_list, checks in every column of columns_to_check
        for column in enumerate(columns_to_check):
            for category in enumerate(categorical_list):
                col_name = column[1] + '_' + category[1]
                self.data[col_name] = self.data[column[1]].str.contains(category[1]).astype('int')
                
                #append newly craeted varaibles to categorical variables
                if self.data[col_name].sum() > 0:
                    self.categorical_variables.append(col_name)
                else:
                    self.data.drop(columns = [col_name], inplace = True)
                
                #delete the string from the column
                if delete_from_strings == 'yes':
                    self.data[column[1]] = self.data[column[1]].apply(lambda x: x.replace(category[1], ''))
                
        return self
    
    def add_dummies2(self, categorical_list, delete_from_strings = 'yes'):
        #adds dummmies from cat_list, checks in concat_title_subtitle column
        for category in enumerate(categorical_list):
            
            #print(category)
            
            col_name = category[1]
            self.data[col_name] = self.data['concat_title_subtitle'].str.contains(category[1]).astype('int')
                
                #append newly craeted varaibles to categorical variables
            if self.data[col_name].sum() > 0:
                self.categorical_variables.append(col_name)
            else:
                self.data.drop(columns = [col_name], inplace = True)
                
                #delete the string from the column
            if delete_from_strings == 'yes':
                self.data['concat_title_subtitle'] = self.data['concat_title_subtitle'].apply(lambda x: x.replace(category[1], ''))
                
        return self
    """
    def ind_test(self, var, alpha = 0.05):
        if alpha > 1 or alpha < 0:
            print('Incorrect alpha value. Select a value from <0;1>.')
            
        if var != 'all' and var not in self.data.columns:
            print('Variable not found in the dataset')
        pivot = round(self.data.pivot_table(values = 'price', index = var, aggfunc = ['count', 'mean']),2)
        pivot.columns = ['count', 'mean']
        
        mean_price = self.data['price'].mean()
        
        pivot['sm'] = pivot['mean']/((pivot['count'])**(1/2))
        
        pivot['t'] = (pivot['mean']-mean_price)/pivot['sm']
        pivot['df'] = pivot['count']-1

        #calculate p-value
        pivot['t_border'] = stats.t.ppf(1-alpha/2, pivot['df'])
        
        #implementation here is not 100% mathematically correct
        return pivot
    """
    def anova(self, var = 'all', alpha = 0.05):
        if var == 'all':
            for variable in enumerate(self.categorical_variables):
                anova_data = self.data[[variable[1], 'price']].reset_index().copy()
                anova_data.columns = ['index', variable[1], 'price']
                equation_string = 'price ~ '+str(variable[1])
                model = ols(equation_string, data=anova_data).fit()
                anova_table = sm.stats.anova_lm(model, typ=2)
                print(anova_table)
                print()
                
                #pairwise comparisons
                pairwise_comparison = pairwise_tukeyhsd(endog = anova_data['price'],
                                                        groups = anova_data[variable[1]],
                                                        alpha = alpha)
                print(pairwise_comparison)
                print()
        #else:
            #TO DO
            #also TO DO check Anova assumptions
    def make_corpus(self):
        #creates a corpus out of title and subtitle column
        for i in range(0, len(self.data)):
            #string = re.sub('[^a-zA-Z]', ' ', self.data.reset_index().loc[i]['concat_title_subtitle'])
            string = self.data.reset_index().loc[i]['concat_title_subtitle']
            string = string.split()
            self.corpus = self.corpus + string
        self.corpus = ListNoDups(self.corpus)
        
        #with open("corpus.txt", "w") as output:
         #   output.write(str(self.corpus))
    
        return self.corpus
    
    def analyse_variables(self, list_of_variables, discard = 0.01):
    # independence tests for a list of variable e.g. corpus
        final_df = pd.DataFrame(columns = ['variable', 'mean_1', 'mean_0', 'count_1', 'count_0'])

        for variable in enumerate(list_of_variables):
            #debug
            print(str(variable)+' done')
            
            self.data[variable[1]] = self.data['concat_title_subtitle'].str.contains(variable[1]).astype('int')

            mean_1 = self.data.loc[self.data[variable[1]] == 1][self.dependent_variable].mean()
            mean_0 = self.data.loc[self.data[variable[1]] == 0][self.dependent_variable].mean()

            count_1 = len(self.data.loc[self.data[variable[1]] == 1])
            count_0 = len(self.data.loc[self.data[variable[1]] == 0])        
            
            if count_1 >= discard * len(self.data) and count_0 >= discard * len(self.data):            
                dict_to_append = {
                    'variable' : variable[1],
                    'mean_1' : mean_1,
                    'mean_0' : mean_0,
                    'count_1' : count_1,
                    'count_0' : count_0
                }

                final_df = final_df.append(dict_to_append, ignore_index = True)

            self.data.drop(columns = [variable[1]], inplace = True)

            #if variable[0] % 1000 == 0:
            #    print(str(variable[0])+'/'+str(len(list_of_variables)))
            
            
        
        
        final_df['mean_diff'] = abs(final_df['mean_1'] - final_df['mean_0'])
        final_df = final_df.sort_values(by = 'mean_diff', ascending = False).reset_index()
        final_df.drop(columns = 'index', inplace = True)
        
        final_df.to_csv('analyse_variables_results.csv')
        
        return final_df

        
            

#cv = CountVectorizer(max_features = 1000)
#X = cv.fit_transform(corpus).toarray()

In [46]:
x = CarData(path = 'data/', dependent_variable = 'price', categorical_variables = ['engine_type', 'city', 'province'],
           numeric_variables = ['price', 'mileage_km', 'engine_cm3', 'year'])

In [6]:
brands = ['alfa', 'audi', 'bmw', 'chevrolet', 'chrysler',
          'citroen', 'dacia', 'daewoo', 'dodge', 'fiat',
          'ford', 'honda', 'hyundai', 'jaguar', 'jeep',
         'kia', 'rover', 'lexus', 'mazda', 'mercedes',
         'mitsubishi', 'nissan', 'opel', 'peugeot',
         'porsche', 'renault', 'seat', 'smart', 'subaru', 
          'suzuki', 'tesla', 'toyota', 'volkswagen',
          'volvo', 'skoda']

In [47]:
x.add_dummies2(brands)

<__main__.CarData at 0x1bd47c4fd08>

In [8]:
corpus = x.make_corpus()

In [9]:
analysis = x.analyse_variables(corpus, discard = 0.01)

(0, 'cx') done
(1, '3') done
(2, '150km') done
(3, '6at') done
(4, '4x4') done
(5, 'skypassion') done
(6, 'biala') done
(7, 'skóra') done
(8, 'safety') done
(9, 'navi') done
(10, 'partner') done
(11, 'opłacony') done
(12, 'dokumentacja') done
(13, 'przebiegu') done
(14, 'sportage') done
(15, 'opłacona') done
(16, 'napęd') done
(17, 'škoda') done
(18, 'roomster') done
(19, 'serwis') done
(20, 'aso') done
(21, 'klimatronic') done
(22, 'grzane') done
(23, 'siedzenia') done
(24, 'combo') done
(25, 'klimatyzacja') done
(26, 'przebieg') done
(27, 'faktura') done
(28, 'polski') done
(29, 'salon') done
(30, 'lancer') done
(31, '1') done
(32, '8') done
(33, 'polska') done
(34, '24000') done
(35, 'seria') done
(36, '320d') done
(37, '5') done
(38, 'e60') done
(39, '535d') done
(40, '272km') done
(41, '2006r') done
(42, 'head') done
(43, 'up') done
(44, 'start') done
(45, 'stop') done
(46, 'xenony') done
(47, 'bogate') done
(48, 'wyposażenie') done
(49, 'grand') done
(50, 'cherokee') done
(51, 'n

(414, 'el') done
(415, 'pamięć') done
(416, '19"') done
(417, 'drzwi') done
(418, 'śliczna') done
(419, '85km') done
(420, 'ładny') done
(421, 'leszno') done
(422, 'c30') done
(423, '6d') done
(424, '2007r') done
(425, 'polskory') done
(426, 'climatronic') done
(427, 'nowe') done
(428, 'czysty') done
(429, 'zadbany') done
(430, 'x1') done
(431, 'xdrive25i') done
(432, '25i') done
(433, '28i') done
(434, '218d') done
(435, 'gran') done
(436, 'advantage') done
(437, 'golf') done
(438, 'vii') done
(439, '0tdi') done
(440, 'comfortline') done
(441, 'po') done
(442, 'wym') done
(443, 'rozrządu') done
(444, 'wł') done
(445, 'q7') done
(446, 'prywatne') done
(447, 'serwisowane') done
(448, 'krajowe') done
(449, 'casa') done
(450, '600') done
(451, 'classic') done
(452, 'seicento') done
(453, 'van') done
(454, 'najnowszy') done
(455, 'model') done
(456, '15800') done
(457, 'fabia') done
(458, 'klimatyzacją') done
(459, 'ważne') done
(460, '143') done
(461, 'biegów') done
(462, 'a4b7') done
(46

(816, 'tysięcy') done
(817, 'temu') done
(818, 'wy') done
(819, '2003') done
(820, '04') done
(821, 'trend') done
(822, 'do') done
(823, 'końca') done
(824, 'silnikiem') done
(825, 'asx') done
(826, '2018r') done
(827, '12') done
(828, 'e83') done
(829, 'gtc') done
(830, 'coupé') done
(831, 'środek') done
(832, 'grip') done
(833, 'control') done
(834, '39tys') done
(835, 'fluence') done
(836, 'fluensce') done
(837, 'pierwsza') done
(838, 'rejestracja') done
(839, 'type') done
(840, '2009') done
(841, 'kraju') done
(842, 'no') done
(843, '370') done
(844, 'nismo') done
(845, 'vitara') done
(846, 'reduktor') done
(847, 'sprawna') done
(848, 'genesis') done
(849, 'ultimate') done
(850, 'najbogatsza') done
(851, 'czujniki') done
(852, 'brembo') done
(853, 's80') done
(854, '5t') done
(855, '4hdi') done
(856, 'captur') done
(857, 'tce') done
(858, '23800') done
(859, 'igła') done
(860, '118ps') done
(861, 'felicia') done
(862, 'felcia') done
(863, 'glx') done
(864, 'salonowa') done
(865, 'i

(1207, 'nowości') done
(1208, 'b7') done
(1209, 'l') done
(1210, 'parktronik') done
(1211, 'chrom') done
(1212, '35') done
(1213, 'megan') done
(1214, '231') done
(1215, 'ecoblue') done
(1216, 'gwarancji') done
(1217, '2004') done
(1218, 'bęzyna') done
(1219, 'romeo') done
(1220, 'mito') done
(1221, 'zafira') done
(1222, 'osobowa') done
(1223, 'bezwypadkowa') done
(1224, 'krajowa') done
(1225, 'właściciela') done
(1226, 'hdi;') done
(1227, 'tanio') done
(1228, 'enjoy') done
(1229, 'cx3') done
(1230, 'kanada') done
(1231, 'unikat') done
(1232, 'tiv') done
(1233, '2020') done
(1234, 'ubezpieczony') done
(1235, 'navara') done
(1236, 'kolor') done
(1237, 'pieprz') done
(1238, 'foteli') done
(1239, 'alufelga') done
(1240, '16') done
(1241, 'orginale') done
(1242, '147') done
(1243, '2001') done
(1244, 'crafter') done
(1245, 'brygadówka') done
(1246, 'off') done
(1247, 'road') done
(1248, '83km') done
(1249, 'clasic') done
(1250, 'ladny') done
(1251, '2kpl') done
(1252, 'kol') done
(1253, 'o

(1585, 'cali') done
(1586, 'panoramiczny') done
(1587, '190km') done
(1588, 'jeżdżący') done
(1589, 'wj') done
(1590, '4turbo') done
(1591, '2011r') done
(1592, 'toyote') done
(1593, 'mustang') done
(1594, 'discovery') done
(1595, '177km') done
(1596, 'ze') done
(1597, 'szwajcarii') done
(1598, 'grandis') done
(1599, 'ka') done
(1600, 'elektryczny') done
(1601, 'w211') done
(1602, '177') done
(1603, '400') done
(1604, 'nm') done
(1605, 'primera') done
(1606, '6multijet') done
(1607, 'długi') done
(1608, 'maxi') done
(1609, '5os') done
(1610, 'przes') done
(1611, 'iwl') done
(1612, 'srebny') done
(1613, '25800') done
(1614, 'sorento') done
(1615, 'slink') done
(1616, 'trail') done
(1617, '8n') done
(1618, '180hp') done
(1619, 'ml') done
(1620, 'ben') done
(1621, 'remoncie') done
(1622, '55kw') done
(1623, 'one') done
(1624, 'salt') done
(1625, '318d') done
(1626, 'sprzęgło') done
(1627, 'pajero') done
(1628, 'did') done
(1629, '8l') done
(1630, 'par') done
(1631, '2014r') done
(1632, '2

(1967, '54') done
(1968, 'miejskie') done
(1969, 'bik') done
(1970, '307cc') done
(1971, 'ti') done
(1972, '129') done
(1973, 'n') done
(1974, 'smax') done
(1975, 'kamper') done
(1976, 'eura') done
(1977, 'mobil') done
(1978, 'eleketryka') done
(1979, 'mły') done
(1980, '124km') done
(1981, 'niskie') done
(1982, 'spalanie') done
(1983, 'mechanicznie') done
(1984, 'ok') done
(1985, 'ładne') done
(1986, 'polestar') done
(1987, '350km') done
(1988, 'mmi') done
(1989, '3g') done
(1990, 'basic') done
(1991, 'cd') done
(1992, 'is350') done
(1993, 'rwd') done
(1994, '123tys') done
(1995, 'dory') done
(1996, 'ignis') done
(1997, 'pojazdu') done
(1998, 'wzór') done
(1999, 'racing') done
(2000, 'bezn') done
(2001, 'tico') done
(2002, 'jest') done
(2003, 'pt') done
(2004, 'bądź') done
(2005, 'motor') done
(2006, 'twinturbo') done
(2007, 'gwint') done
(2008, '19cali') done
(2009, 'serwisokazja') done
(2010, '120i') done
(2011, '08') done
(2012, 'darmowe') done
(2013, 'serwisy') done
(2014, '2023')

(2349, 'dużo') done
(2350, 'zamiany') done
(2351, 'salonie') done
(2352, 'toyoty') done
(2353, '69000km') done
(2354, 'kupiona') done
(2355, 'pakowny') done
(2356, 'stopniowa') done
(2357, 'vat1') done
(2358, 'centralny') done
(2359, 'zamek') done
(2360, '308cc') done
(2361, 'xmod') done
(2362, 'x2') done
(2363, 'miesięcy') done
(2364, 'gg') done
(2365, 'gliwice') done
(2366, 'fiesta1') done
(2367, '82ps') done
(2368, '2028') done
(2369, 'aux') done
(2370, 'xara') done
(2371, 'xt') done
(2372, 'gazbrc') done
(2373, 'top2004r') done
(2374, 'sal') done
(2375, '99733') done
(2376, 'team') done
(2377, 'pilota') done
(2378, 'zia') done
(2379, 'poprawek') done
(2380, 'blacharskich') done
(2381, 'c230') done
(2382, 'sportedition') done
(2383, '204km') done
(2384, 'zavoli') done
(2385, '7p') done
(2386, '300') done
(2387, 'uczciwy') done
(2388, 'ypsilon') done
(2389, '420d') done
(2390, 'grancoupe') done
(2391, 'modernlinesport') done
(2392, 'orginał') done
(2393, '185') done
(2394, 'szkany') 

(2727, 'dociski') done
(2728, 'pneumatyka') done
(2729, 'deskas') done
(2730, 'kóra') done
(2731, 'komforty') done
(2732, '116koni') done
(2733, '5i') done
(2734, 'tdv6') done
(2735, '153tyś') done
(2736, 'mt') done
(2737, '2l') done
(2738, 'opla') done
(2739, 'astrę') done
(2740, '46') done
(2741, '112') done
(2742, '19') done
(2743, 'ręki') done
(2744, '360') done
(2745, 'skyactiv') done
(2746, 'automatik') done
(2747, 'insignię') done
(2748, 'klina') done
(2749, '114') done
(2750, '96') done
(2751, 'bawaria') done
(2752, 'motors') done
(2753, 'kola') done
(2754, 'ipad') done
(2755, '105000') done
(2756, 'maxima') done
(2757, 'se') done
(2758, '183000km') done
(2759, '256km') done
(2760, 'kupienia') done
(2761, 'tygodniu') done
(2762, 'lipca') done
(2763, '52') done
(2764, '212') done
(2765, 'young') done
(2766, 'roczny') done
(2767, 'grudzień') done
(2768, 'sprzaedam') done
(2769, 'led180ps') done
(2770, 'cinquecento') done
(2771, 'młody') done
(2772, 'właści') done
(2773, '7mio') d

(3100, 'fabrycznym') done
(3101, '176066km') done
(3102, 'commonrail') done
(3103, 'czarnysufit') done
(3104, 'clk240') done
(3105, '233') done
(3106, 'r56') done
(3107, '2xsline') done
(3108, 'razy') done
(3109, 'zwinne') done
(3110, 'fuel') done
(3111, 'crossclimat') done
(3112, 'michelin') done
(3113, '0bluehdi') done
(3114, 'eat8') done
(3115, 'focal') done
(3116, 'demo') done
(3117, "''black''") done
(3118, 'kaoitańskie') done
(3119, 'feel') done
(3120, '316ti') done
(3121, 'bluray') done
(3122, 'szampan') done
(3123, 'iveco') done
(3124, 'massif') done
(3125, 'daily') done
(3126, '35c12') done
(3127, 'wywrotka') done
(3128, 'kiper') done
(3129, 'wywrot') done
(3130, 'access') done
(3131, 'xcellence') done
(3132, 'rabatu') done
(3133, 'francja') done
(3134, 'm2014') done
(3135, '2xrline') done
(3136, '30th') done
(3137, 'anniversary') done
(3138, 'alur18') done
(3139, 'jvc') done
(3140, 'podgrzszyba') done
(3141, 'oponyzimowe') done
(3142, 'wagon') done
(3143, '193km') done
(3144,

(3470, 'silniku') done
(3471, 'ciągłym') done
(3472, 'użytku') done
(3473, 'kontaktu') done
(3474, 'skora') done
(3475, '18cali') done
(3476, 'banzyna') done
(3477, '4el') done
(3478, 'gdańsk') done
(3479, 'avf') done
(3480, 'właścicel') done
(3481, 'karl') done
(3482, '0pb') done
(3483, 'alize') done
(3484, 'spptage') done
(3485, '1012') done
(3486, 'brąz') done
(3487, "'14") done
(3488, '176tys') done
(3489, '78000') done
(3490, 'w163') done
(3491, 'diesl') done
(3492, 'pneumatyki') done
(3493, 'trendline') done
(3494, 'stra') done
(3495, 'scaut') done
(3496, 'koszalin') done
(3497, 'pierw') done
(3498, '2xalu') done
(3499, 'qvline') done
(3500, '2xnavi') done
(3501, 'fot') done
(3502, "'15") done
(3503, '173km') done
(3504, 'turbiny') done
(3505, 'idaelny') done
(3506, '19`') done
(3507, '5ecoboost') done
(3508, 'netto:') done
(3509, 'nawi;;bi') done
(3510, 'jezdzi') done
(3511, 'badanie') done
(3512, 'techniczne') done
(3513, 'bezypadkowy') done
(3514, 'oryginalna') done
(3515, 'si

(3844, 'altima') done
(3845, 'okazyjna') done
(3846, '2020rok') done
(3847, '153000') done
(3848, '6el') done
(3849, 'parkowan') done
(3850, 'roland') done
(3851, 'garros') done
(3852, 'żadnych') done
(3853, 'uszkodzeń') done
(3854, 'oplacona') done
(3855, '70tyś') done
(3856, '164km') done
(3857, 'hydractive') done
(3858, '88') done
(3859, 'krk') done
(3860, 'msc') done
(3861, 'leasingi') done
(3862, 'alcatrara') done
(3863, 'gwarncją') done
(3864, 'serwisu') done
(3865, '31') done
(3866, 'tdi120') done
(3867, 'siedzienia') done
(3868, 'start&stop') done
(3869, '9osób') done
(3870, 'bensin') done
(3871, 'warty') done
(3872, 'pionner') done
(3873, 'minimalny') done
(3874, '1szy') done
(3875, 'poniżej') done
(3876, 'wind') done
(3877, '44000przebieg') done
(3878, 'trooper') done
(3879, 'okzja') done
(3880, '13500') done
(3881, 'gulietta') done
(3882, '525xi') done
(3883, '$a4$b7$1') done
(3884, '8t$163km$s') done
(3885, 'line$alcantara&alu$opłacona$okazja') done
(3886, 'tb') done
(3887,

(4213, '306') done
(4214, 'łączna') done
(4215, 'pakietowa') done
(4216, '8iben') done
(4217, 'opłac') done
(4218, 'serwisde') done
(4219, 'zaworowy') done
(4220, 'mulitspace') done
(4221, 'dudki11') done
(4222, 'tüv') done
(4223, 'c8wronki807') done
(4224, 'd1') done
(4225, 'mivec') done
(4226, 'półskora') done
(4227, 'mulitvan') done
(4228, 'vgs') done
(4229, '180d') done
(4230, 'klimatroniksewis') done
(4231, 'bokate') done
(4232, 'wymieniona') done
(4233, 'siena') done
(4234, 'bemowo') done
(4235, 'stylistyczny') done
(4236, 'servisie') done
(4237, 'weresja') done
(4238, 'rexton') done
(4239, '137kw') done
(4240, '166') done
(4241, 'kolekcjonerska') done
(4242, 'pasjonata') done
(4243, 'silver') done
(4244, 'lilac') done
(4245, '66km') done
(4246, '10748') done
(4247, '202tyszadbany') done
(4248, 'match') done
(4249, '4xgrzane') done
(4250, '03r') done
(4251, 'kalos') done
(4252, '6bluehdi') done
(4253, 'asyst') done
(4254, '"autobiography"') done
(4255, 'czas') done
(4256, 'zmianę

(4578, '50km') done
(4579, '69tys') done
(4580, '1wlaściciel') done
(4581, '335') done
(4582, 'autoalarm') done
(4583, '50th') done
(4584, 'faktira') done
(4585, 'sta') done
(4586, 'miesiecy') done
(4587, 'kch7k74') done
(4588, 'krajówka') done
(4589, 'idelany') done
(4590, 'qashai') done
(4591, 'karlik') done
(4592, '181km') done
(4593, 'połskora') done
(4594, 'i4d') done
(4595, 'my19') done
(4596, "dpf'a") done
(4597, '98000km') done
(4598, '117000') done
(4599, 'skrzynią') done
(4600, 'sebring') done
(4601, 'etrans') done
(4602, 'ciesz') done
(4603, 'samochodem') done
(4604, 'zapłać') done
(4605, 'wakacjach') done
(4606, 'właścicie') done
(4607, '96km') done
(4608, 'wymagających') done
(4609, 'klientów') done
(4610, '194') done
(4611, 'quadra') done
(4612, 'drive™') done
(4613, 'boston') done
(4614, 'busines') done
(4615, 'gpf') done
(4616, 'ql') done
(4617, 'connenta') done
(4618, 'świeże') done
(4619, '43000tyś') done
(4620, '642') done
(4621, '241km') done
(4622, 'dachowe') done


(4943, '76tkm') done
(4944, '2xamg') done
(4945, 'nap') done
(4946, 'faktuta') done
(4947, '0ttid') done
(4948, 'japoński') done
(4949, 'dostępna') done
(4950, 'c250') done
(4951, 'atomat') done
(4952, 'zobaczzdomu') done
(4953, 'bezywpadkowy') done
(4954, 'gsm') done
(4955, '137') done
(4956, 'jeżdżąca') done
(4957, '420') done
(4958, '420cdi') done
(4959, '400zł') done
(4960, 'dostępny') done
(4961, 'vl') done
(4962, 'hf') done
(4963, '2tce') done
(4964, 'osobow') done
(4965, '109tyś') done
(4966, '99km') done
(4967, 'n47d20a') done
(4968, 'spacegrau') done
(4969, 'metallic') done
(4970, 'wypadku') done
(4971, 'smiga') done
(4972, 'szt') done
(4973, '6vvt') done
(4974, 'elbląg') done
(4975, '114tyś') done
(4976, '639') done
(4977, 'blacharka') done
(4978, 'zrobienia') done
(4979, 'bitdi') done
(4980, '490') done
(4981, 'nałożona') done
(4982, 'importowany') done
(4983, '194tys') done
(4984, '435d') done
(4985, 'intensive') done
(4986, 'styl') done
(4987, 'różne') done
(4988, 'kolory'

(5300, '108') done
(5301, '"navi"') done
(5302, 'klimatronikiem') done
(5303, 'ubezpiecznie') done
(5304, 'do19') done
(5305, 'nimiec') done
(5306, 'poliftowy') done
(5307, 'tacuma') done
(5308, 'montegoblau') done
(5309, 'mapa190km') done
(5310, '81500') done
(5311, 'duratorq') done
(5312, 'tc') done
(5313, 'dv6') done
(5314, 'moondust') done
(5315, '6d2') done
(5316, 'parctronic') done
(5317, '204ps') done
(5318, '215') done
(5319, '162') done
(5320, 'pandemiczna') done
(5321, "13'125km") done
(5322, 'europejski') done
(5323, 'heanup') done
(5324, 'w638') done
(5325, 'g01') done
(5326, 'e270') done
(5327, 'pojemność') done
(5328, 'afl') done
(5329, 'helly') done
(5330, 'hansen') done
(5331, 'peougot') done
(5332, '103tyś') done
(5333, 'tam') done
(5334, 'golfik') done
(5335, '0d4') done
(5336, 'grafitowy') done
(5337, 'babka') done
(5338, 'stanie:') done
(5339, 'front') done
(5340, '108000km') done
(5341, '2gi') done
(5342, 'suberb') done
(5343, '34500') done
(5344, '200hp') done
(53

(5667, '25k') done
(5668, '158ps') done
(5669, 'dofinansowane') done
(5670, 'austrii') done
(5671, 'mercedec') done
(5672, 'felline') done
(5673, '585') done
(5674, 'oxygo') done
(5675, 'doinwestowanyfabrycznylpg') done
(5676, '540i') done
(5677, 'bam') done
(5678, '151km') done
(5679, '147oookm') done
(5680, 'foccus') done
(5681, 'swiezo') done
(5682, 'tronik') done
(5683, '999zł') done
(5684, 'quada') done
(5685, '4matik') done
(5686, 'oso') done
(5687, 'cztery') done
(5688, 'phedra') done
(5689, 'light') done
(5690, 'sewrwis') done
(5691, 'gener') done
(5692, 'miły') done
(5693, "alu'18") done
(5694, 'harmankordon') done
(5695, 'dużanavi') done
(5696, 'zgrabna') done
(5697, 'bm') done
(5698, '18"alu') done
(5699, 'absolutnie') done
(5700, "alu'17") done
(5701, 'pożarniczy') done
(5702, 'cr170') done
(5703, 'fullserwi') done
(5704, '80ps') done
(5705, 'vel') done
(5706, 'satis') done
(5707, 'spoty') done
(5708, 'injection') done
(5709, '7osob') done
(5710, '2xparktronik') done
(5711,

(6033, 'ser') done
(6034, 'najboggassza') done
(6035, '11tyś') done
(6036, 'gru') done
(6037, 'spalaniem') done
(6038, '"s') done
(6039, '988') done
(6040, 'signature') done
(6041, 'osobawa') done
(6042, 'otomoto') done
(6043, '200d') done
(6044, 'niem') done
(6045, 'bezwypakowy') done
(6046, 'g21') done
(6047, 'welury') done
(6048, '122tyś') done
(6049, '115000km') done
(6050, 'kmp') done
(6051, 'kole') done
(6052, 'łady') done
(6053, '275000') done
(6054, 'rozważę') done
(6055, 'każdą') done
(6056, 'ofertę') done
(6057, 'dzwonić') done
(6058, '136tyśkm') done
(6059, '58tyśkm') done
(6060, 'bezkolizyjna') done
(6061, 'ślicznego') done
(6062, 'wyposażonego') done
(6063, 'spacetourer') done
(6064, 'błędów') done
(6065, 'obdii') done
(6066, '94tkm') done
(6067, 'brazik') done
(6068, '286ps') done
(6069, '150910km') done
(6070, 'captivia') done
(6071, 'złotówki') done
(6072, 'ready') done
(6073, '170tyś') done
(6074, 'oryginał100') done
(6075, 'najtaniej') done
(6076, 'warsztatowa') done


(6396, 'supersprint') done
(6397, 'weba') done
(6398, '2kplalu') done
(6399, 'cylindrów') done
(6400, 'auq') done
(6401, 'xdrive30i') done
(6402, 'powabna') done
(6403, 'wyposażoną') done
(6404, '407km') done
(6405, 't8') done
(6406, 'aktualna') done
(6407, '19alu') done
(6408, '1596') done
(6409, 'rejstr') done
(6410, '0xd') done
(6411, 'poliftowe') done
(6412, 'dopasione') done
(6413, 'napę') done
(6414, 'pamieć') done
(6415, 'fote') done
(6416, '2tid') done
(6417, 'saabinka') done
(6418, '220i') done
(6419, 'startstop') done
(6420, 'elementy') done
(6421, '535i') done
(6422, 'eb') done
(6423, 'dawca') done
(6424, 'srodek') done
(6425, 'karty') done
(6426, 'qx60') done
(6427, 'pokład') done
(6428, 'pancernym') done
(6429, 'ślicznotka') done
(6430, 'aluflegi') done
(6431, 'camp') done
(6432, 'podjazd') done
(6433, 'inwalidzki') done
(6434, '127298km') done
(6435, 'gwaracja') done
(6436, 'złoty') done
(6437, 'guattro') done
(6438, 'marca') done
(6439, 'niska') done
(6440, 'przy') done


(6761, '159tyś') done
(6762, 'salonwe') done
(6763, 'buisness') done
(6764, 'mix') done
(6765, 'model2008') done
(6766, 'mid') done
(6767, 'app') done
(6768, 'soll') done
(6769, '4benzynk') done
(6770, 'niemiec2012r') done
(6771, 'exclusiv') done
(6772, 'puredrive') done
(6773, '1400cm') done
(6774, 'mexicanred') done
(6775, 'model2014') done
(6776, 'dfa') done
(6777, 'model2010') done
(6778, '3miesiące') done
(6779, 'model2013') done
(6780, 'znakomity') done
(6781, 'fx35') done
(6782, 'ozonowany') done
(6783, '167tkm') done
(6784, 'open') done
(6785, '4ivtec') done
(6786, 'białaperła') done
(6787, 'dynamicled') done
(6788, 'laneassist') done
(6789, 'higline') done
(6790, 'value') done
(6791, 'dlugo') done
(6792, 'dyn') done
(6793, 'klimatron2x') done
(6794, 'pełnyserwis') done
(6795, 'technik') done
(6796, 'supre') done
(6797, '430') done
(6798, 'mpacket') done
(6799, '5gt') done
(6800, '85000') done
(6801, 'mzada') done
(6802, 'oryginalnie') done
(6803, 'klimatronicgrzanaszyba') done

(7124, 'cylindry') done
(7125, 'stalówki') done
(7126, 'schwarz') done
(7127, '152tyś') done
(7128, '380') done
(7129, 'rx350') done
(7130, 'movano') done
(7131, 'wielosezonowe') done
(7132, 'duuży') done
(7133, 'by') done
(7134, 'loeb') done
(7135, 'autohold') done
(7136, '62') done
(7137, '66700km') done
(7138, 'tdti') done
(7139, 'fender') done
(7140, 'finansowanie') done
(7141, 'exectuvie') done
(7142, 'fak23') done
(7143, 'powodów') done
(7144, 'że') done
(7145, 'kupić') done
(7146, 'cię') done
(7147, 'zaskoczy') done
(7148, 'adblue') done
(7149, 'dostawą') done
(7150, 'drzw') done
(7151, 'skórabrąz') done
(7152, 'pięny') done
(7153, '1wlasc') done
(7154, 'buisnes') done
(7155, 'bezwypadowy') done
(7156, 'regal') done
(7157, 'buickregal') done
(7158, 'radości') done
(7159, 'stopni') done
(7160, 'awantgarde') done
(7161, '152400') done
(7162, 'volskwagen') done
(7163, '6ben') done
(7164, 'konkret') done
(7165, '3turbo160kmtekna') done
(7166, "kamery360'panorama") done
(7167, 'vtc')

(7476, 'oepl') done
(7477, 'olecko') done
(7478, '3jtd') done
(7479, 'zerejstrowany') done
(7480, 'viaro') done
(7481, 'przesuwane') done
(7482, 'skóratempomat') done
(7483, '167k') done
(7484, 'toutan') done
(7485, '1t3') done
(7486, 'svt') done
(7487, 'pacyfic') done
(7488, '2vti') done
(7489, 'zadań') done
(7490, 'specjalnych') done
(7491, 'mała') done
(7492, 'wielkie') done
(7493, 'środku') done
(7494, 'cr140km') done
(7495, 'nawiew') done
(7496, 'prezencja') done
(7497, 'dsg6') done
(7498, 'prywatn') done
(7499, 'nestor') done
(7500, 'baron') done
(7501, 'retro') done
(7502, 'ślubu') done
(7503, 'najbogatasza') done
(7504, 'najem') done
(7505, 'długoter') done
(7506, 'bleck') done
(7507, 'petrol') done
(7508, 'przebieg118tyś') done
(7509, 'parktonik') done
(7510, '17"zarejestrowana') done
(7511, 'alex') done
(7512, 'zarwjestrowane') done
(7513, 'sztywna') done
(7514, 'lutego') done
(7515, '2025') done
(7516, 'przywieziemy') done
(7517, 'niemalowany') done
(7518, 't28') done
(7519,

(7839, 'zegar') done
(7840, 'sterownik') done
(7841, 'sdrive16d') done
(7842, '116tyskm') done
(7843, 'zajejestrowana') done
(7844, 'rubin') done
(7845, 'mokkax') done
(7846, 'x250') done
(7847, 'cougar') done
(7848, 'włascicieli') done
(7849, '329km') done
(7850, 'landrynkowa') done
(7851, 'e280') done
(7852, '148ttk') done
(7853, '38tys') done
(7854, 'renaul') done
(7855, '147tys') done
(7856, 'benzyma') done
(7857, 'prince') done
(7858, 'dubai') done
(7859, 'freeze') done
(7860, 'rodzynek') done
(7861, 'litrowy') done
(7862, 'kubełkowe') done
(7863, 'kś') done
(7864, 'pro2') done
(7865, 'maxa') done
(7866, 'parkow') done
(7867, '44870') done
(7868, 'tcdi') done
(7869, 'gezet') done
(7870, 'mascott') done
(7871, 'plandeka') done
(7872, 'blizniacze') done
(7873, '60c16') done
(7874, 'palet') done
(7875, 'austia') done
(7876, 'rejestacji') done
(7877, 'kleks') done
(7878, '130koni') done
(7879, 'vwtouran') done
(7880, 'stuka') done
(7881, 'stepweay') done
(7882, 'sdn') done
(7883, 'kar

(8207, 'cirtoen') done
(8208, 'megabenzyna203pskołalatozimaautomat') done
(8209, 'uszkodz') done
(8210, '48000') done
(8211, 'swojej') done
(8212, 'grandland') done
(8213, 'linequattro') done
(8214, 'fx3') done
(8215, 'koronie') done
(8216, 'kapitańskie') done
(8217, 'gh') done
(8218, 'holandii') done
(8219, 'klimat') done
(8220, 'brilliance') done
(8221, 'bs4') done
(8222, '63tys') done
(8223, 'endeavor') done
(8224, 'renuault') done
(8225, '92tys') done
(8226, 'bezwzpadkowy') done
(8227, 'gablota') done
(8228, '2500') done
(8229, 'b2500') done
(8230, 'kabiny') done
(8231, 'chłodnice') done
(8232, 'całe') done
(8233, 'optimum') done
(8234, '2017idealny') done
(8235, '4xszyby') done
(8236, 'passem') done
(8237, 'zniemiec1wł') done
(8238, 'recaroxenonnavi') done
(8239, 'blacharski') done
(8240, 'model2018') done
(8241, 'monitory') done
(8242, '158tys') done
(8243, 'cale') done
(8244, '6g') done
(8245, 'ej9') done
(8246, 'piewsz') done
(8247, '52tyś') done
(8248, "'09") done
(8249, 'same

(8561, 'karta') done
(8562, '0cd') done
(8563, 'kllima') done
(8564, 'aeromanual') done
(8565, '502') done
(8566, 'klimaautomatyczna') done
(8567, '133tyś') done
(8568, 'bijak') done
(8569, '"park') done
(8570, 'lane"') done
(8571, 'side') done
(8572, 'symetrical') done
(8573, 'fuled') done
(8574, '166ps') done
(8575, 'dealerplichta') done
(8576, 'prakowania') done
(8577, 'bonus') done
(8578, 'akcesoryjny') done
(8579, 'skybusiness') done
(8580, 'elte') done
(8581, 'rabatem') done
(8582, '63tyskm') done
(8583, 'sensors') done
(8584, 'tline') done
(8585, 'assistance') done
(8586, 'faktur') done
(8587, 'mi') done
(8588, 'służy') done
(8589, 'dw5m625') done
(8590, 'stargaizzing') done
(8591, '100hp') done
(8592, 'szybka') done
(8593, 'zwinna') done
(8594, 'martwypunkt') done
(8595, '21900zł') done
(8596, 'opcją') done
(8597, 'wykupu') done
(8598, '576') done
(8599, 'serwisami') done
(8600, 'sewris') done
(8601, 'zwykłe') done
(8602, '34500tys') done
(8603, 'sobie') done
(8604, 'równych') 

(8923, 'maaaax') done
(8924, 'łódż') done
(8925, 'pełneaso') done
(8926, 'remontu') done
(8927, 'mieście') done
(8928, 'e85') done
(8929, 'm52b28') done
(8930, 'pamieci') done
(8931, 'łopatkami') done
(8932, 'kup') done
(8933, 'kliamatronic') done
(8934, "'piękna'") done
(8935, '344') done
(8936, 'właścieiel') done
(8937, '239') done
(8938, 'ray') done
(8939, 'słuchawek') done
(8940, 'carat') done
(8941, 'klimatyzacjia') done
(8942, '125tyś') done
(8943, 'msport') done
(8944, 'spec') done
(8945, 'ecole') done
(8946, 'nauka') done
(8947, 'auris1') done
(8948, 'moich') done
(8949, 'wiązka') done
(8950, 'kabli') done
(8951, 'zapłonem') done
(8952, 'odpal') done
(8953, 'łatwa') done
(8954, 'nego') done
(8955, 'kość') done
(8956, 'słoniowa') done
(8957, 'tra') done
(8958, 'ekonomiczną') done
(8959, 'titaniun') done
(8960, 'avj') done
(8961, '146000km') done
(8962, 'xen0n') done
(8963, '109000') done
(8964, 'vintage') done
(8965, 'joanna') done
(8966, 'brodzik') done
(8967, '14okm') done
(89

(9283, 'reklame') done
(9284, 'dream') done
(9285, 'rozpoznawanie') done
(9286, '390') done
(9287, 'bezwypadkoy') done
(9288, '32900') done
(9289, '159tys') done
(9290, '101tys') done
(9291, '1200') done
(9292, 'bluhdi') done
(9293, '61tys') done
(9294, '2v') done
(9295, 'gmc') done
(9296, 'acadia') done
(9297, '288km') done
(9298, '109tys') done
(9299, 'miesięcznie') done
(9300, 'limusine') done
(9301, '3000kg') done
(9302, '1wlascicel') done
(9303, 'seri') done
(9304, '245tys') done
(9305, 'raz') done
(9306, '5vvt') done
(9307, '2lcosmo') done
(9308, 'dealerpolska') done
(9309, 'mr`08') done
(9310, '350ml') done
(9311, 'vitarę') done
(9312, 'baja') done
(9313, '972') done
(9314, 'multiplę') done
(9315, '15zł') done
(9316, 'slask') done
(9317, 'z20net') done
(9318, '2pt') done
(9319, 'bewypadkowe') done
(9320, 'uks') done
(9321, 'bezkluczykowy') done
(9322, 'kujawsko') done
(9323, 'pomorskie') done
(9324, 'lessgo') done
(9325, 'c63amg') done
(9326, 'nightvision') done
(9327, 'homelink

(9643, 'orygina') done
(9644, 'benzynie') done
(9645, 'yahoo') done
(9646, '332900km') done
(9647, 'america') done
(9648, 'og') done
(9649, '37tys') done
(9650, 'czakram') done
(9651, '9950zl') done
(9652, '"dsg"') done
(9653, 'otwierane') done
(9654, '81km') done
(9655, 'zapala') done
(9656, 'hydroaktive') done
(9657, 'pley') done
(9658, 'karbonowe') done
(9659, 'wykończenia') done
(9660, 'tunning') done
(9661, 'exterieur') done
(9662, 'trzydziesci') done
(9663, 'tysiace') done
(9664, '184hp') done
(9665, "'17") done
(9666, 'rocks') done
(9667, '5300km') done
(9668, 'tivoli') done
(9669, 'ssaphire') done
(9670, '158500') done
(9671, 'crusier') done
(9672, 'kapitalnym') done
(9673, 'fenomenalna') done
(9674, 'całkowicie') done
(9675, '0benzyna152km') done
(9676, 'ani') done
(9677, 'szpachli') done
(9678, '4xklimatronik') done
(9679, 'qv') done
(9680, 'competizione') done
(9681, '300cdi') done
(9682, '170l') done
(9683, 'łożko') done
(9684, 'cvvl') done
(9685, 'spiritxl') done
(9686, '1

(9998, 'corsika') done
(9999, 'martwepole') done
(10000, 'oplaco') done
(10001, 'sprzedac') done
(10002, '"nardi') done
(10003, 'torino"') done
(10004, '87ps') done
(10005, '24tyś') done
(10006, 'przepięknystan') done
(10007, 'a9g') done
(10008, 'skaypassion') done
(10009, 'bsedan') done
(10010, 'htb') done
(10011, 'słupsk') done
(10012, '6"') done
(10013, 'najtańszy') done
(10014, 'mechaniczny') done
(10015, 'dostep') done
(10016, 'pawelec') done
(10017, 'clk230') done
(10018, '6100km') done
(10019, '6cdi') done
(10020, 'bagażnikiem') done
(10021, '95tyskm') done
(10022, '130i') done
(10023, '4tb') done
(10024, 'przeszklony') done
(10025, '110000km') done
(10026, 'win') done
(10027, '8100km') done
(10028, 'dokumentacją') done
(10029, '6diesel120km') done
(10030, 'najmu') done
(10031, 'dopłata') done
(10032, 'pożyczka') done
(10033, 'złote') done
(10034, '92ps') done
(10035, '4796') done
(10036, 'zwykła') done
(10037, 'utrzymaniu') done
(10038, 'hydropneumatyka') done
(10039, 'beżowy')

(10337, '8e') done
(10338, '18’’') done
(10339, 'a220') done
(10340, 'e240') done
(10341, '4x4cosmo') done
(10342, 'pooplatach') done
(10343, 'chryzler') done
(10344, '206000') done
(10345, 'fire') done
(10346, '65tyś') done
(10347, 'lsd') done
(10348, 'quaife') done
(10349, '2ictd176') done
(10350, '6200') done
(10351, '315') done
(10352, 'jednym') done
(10353, 'bezwyadek') done
(10354, 'klmtr') done
(10355, 'bardo') done
(10356, '11500') done
(10357, 'misubishi') done
(10358, 'sandera') done
(10359, 'ovtavia') done
(10360, 'srt4') done
(10361, 'wyprawówka') done
(10362, 'drogi') done
(10363, 'kruk') done
(10364, 'zadabany') done
(10365, 'brzesko') done
(10366, '227000') done
(10367, 'marea') done
(10368, 'poliftingowa') done
(10369, '2999zł') done
(10370, 'astr') done
(10371, 'atywtemp') done
(10372, 'voc') done
(10373, 'rewelacyjnystan') done
(10374, 'youtube') done
(10375, 'mnual') done
(10376, 'luxry') done
(10377, 'pier') done
(10378, 'wercsja') done
(10379, 'klasę') done
(10380,

(10681, 'naszej') done
(10682, 'sprowadzony1') done
(10683, 'bezwypadowa') done
(10684, 'optymalizacja') done
(10685, '173517') done
(10686, 'rej:256zł') done
(10687, 'a52') done
(10688, 'chabrowy') done
(10689, 'prawdziwe') done
(10690, '84500km') done
(10691, 'sprzedamc3') done
(10692, 'felgami') done
(10693, 'iealny') done
(10694, '370nm') done
(10695, 'własciel') done
(10696, '4x4automat') done
(10697, 'tuv04') done
(10698, 'legnica') done
(10699, 'lodowka') done
(10700, 'przeprawowy') done
(10701, 'santana') done
(10702, 'ps10') done
(10703, 'arb') done
(10704, 'ramą') done
(10705, '119000') done
(10706, 'dsg;') done
(10707, 'style;') done
(10708, '48tys') done
(10709, 'km;led;7') done
(10710, 'osob;vat23') done
(10711, 'rewelacyjnym') done
(10712, 'euroline') done
(10713, '56000') done
(10714, 'benzena') done
(10715, 'toop') done
(10716, 'fort') done
(10717, 'heico') done
(10718, 'najpiękniejsze') done
(10719, '232tkm') done
(10720, 'prologic') done
(10721, '226km') done
(10722, 

(11023, 'foteliki') done
(11024, 'kluczykowe') done
(11025, 'odpalanie') done
(11026, '144000km') done
(11027, 'pinifarina') done
(11028, 'aktu') done
(11029, '6cd') done
(11030, 'we126ml') done
(11031, '81kw') done
(11032, 'mixx') done
(11033, '19rs') done
(11034, 'autotrade') done
(11035, 'radom') done
(11036, "stow'n") done
(11037, 'aventura') done
(11038, '^') done
(11039, 'motomy') done
(11040, 'demonostracyjne') done
(11041, 'supercharged') done
(11042, '174ps') done
(11043, 'kół:') done
(11044, '15’') done
(11045, 'magnaflow') done
(11046, '2xmem') done
(11047, '43tyś') done
(11048, '0lpg') done
(11049, 'wlascicie') done
(11050, '138tkm') done
(11051, '1włunikat') done
(11052, '750d') done
(11053, "2013'") done
(11054, 'zarekestrowany') done
(11055, 'widocznej') done
(11056, 'małygwarantowanyprzebieg') done
(11057, 'gwarancja12miesięcy') done
(11058, 'klaca') done
(11059, '245zł') done
(11060, '218da') done
(11061, 'fullledy') done
(11062, 'poslka') done
(11063, '535da') done
(1

(11366, 'mulifunkcja') done
(11367, 'dot') done
(11368, 'parkowani') done
(11369, 'aż') done
(11370, 'multifunkcjia') done
(11371, 'przejęcia') done
(11372, 'odświeżoną') done
(11373, 'fabryczn') done
(11374, 'driveprofil') done
(11375, '"okazja"') done
(11376, 'intelllink') done
(11377, 'dw8c522') done
(11378, 'bezyana') done
(11379, 'nisana') done
(11380, 'batman') done
(11381, '70000') done
(11382, '12mies') done
(11383, '322i') done
(11384, 'ciężarowyn1') done
(11385, 'group') done
(11386, 'kliamtronic') done
(11387, 'drodze') done
(11388, 'satan') done
(11389, '1199') done
(11390, 'ekrany') done
(11391, '25"') done
(11392, 'zgnity') done
(11393, 'cuna') done
(11394, '145tyś') done
(11395, 'dw1w595') done
(11396, 'wx84607') done
(11397, 'po8ac31') done
(11398, 'llft') done
(11399, 'sg1973u') done
(11400, 'cel') done
(11401, 'sk') done
(11402, '420nm') done
(11403, 'ruda') done
(11404, 'societe') done
(11405, 'nettovat') done
(11406, '605') done
(11407, 'nowość') done
(11408, 'wd967

(11705, 'tom') done
(11706, '6io') done
(11707, 'om642') done
(11708, '190koni') done
(11709, 'kredytgwarancja') done
(11710, 'krótki') done
(11711, 'tturbo') done
(11712, 'blueray') done
(11713, 'langer') done
(11714, 'dw') done
(11715, 'itc') done
(11716, 'sst') done
(11717, 'linie') done
(11718, '5cylindrów') done
(11719, 'mr2015') done
(11720, '150oookm') done
(11721, 'hlx') done
(11722, 'alahambra') done
(11723, 'cyfr') done
(11724, '1rej2016') done
(11725, 'automat8biegów') done
(11726, 'io') done
(11727, '4xgrzanie') done
(11728, 'gwarancjaselekt') done
(11729, 'możłiwa') done
(11730, '125tyśkm') done
(11731, 'logo') done
(11732, "'10") done
(11733, 'komfortp') done
(11734, 'akiet') done
(11735, 'ty') done
(11736, 'carawela') done
(11737, 'nisk') done
(11738, '0tddi') done
(11739, 'pcja') done
(11740, 'fry') done
(11741, 'cofanie') done
(11742, 'adnanced') done
(11743, 'subary') done
(11744, 'potwirdzony') done
(11745, 'kameraconverselklapafullledypanoramadach') done
(11746, 'mf

(12047, '242') done
(12048, 'san') done
(12049, '170kw') done
(12050, '4200zł') done
(12051, '16v;') done
(12052, 'c63') done
(12053, '298tys') done
(12054, 'duzanavi') done
(12055, '19”') done
(12056, 'odsuwane') done
(12057, 'zarezerwowane') done
(12058, '0crdi184ps') done
(12059, 'parter') done
(12060, 'xkr') done
(12061, 'alufegli') done
(12062, 'golf4') done
(12063, '4motions') done
(12064, 'panorama2x') done
(12065, 'alhambrę') done
(12066, 'ils2') done
(12067, 'x308') done
(12068, 'hybryda;') done
(12069, '2018r;') done
(12070, 'tysk') done
(12071, 'klima;led') done
(12072, 'berligo') done
(12073, 'scenic2') done
(12074, '75koni') done
(12075, 'szkód') done
(12076, 'atra') done
(12077, '220koni') done
(12078, '3ben') done
(12079, 'keykess') done
(12080, 'gwaranja') done
(12081, '17;;') done
(12082, 'lineasis') done
(12083, 'sportive') done
(12084, 'remus') done
(12085, 'rasowy') done
(12086, 'nihgt&day') done
(12087, 'six') done
(12088, '97tkm') done
(12089, 'od1') done
(12090, 

(12392, 'sportiv') done
(12393, "my'18") done
(12394, 'boleno') done
(12395, 'nowością') done
(12396, '1298cm') done
(12397, 'renulat') done
(12398, '46400') done
(12399, 'zaworowa') done
(12400, 'climat') done
(12401, 'baaardzo') done
(12402, 'philips') done
(12403, 'fullll') done
(12404, 'około') done
(12405, '380nm') done
(12406, 'mobilo') done
(12407, 'vipgwarant') done
(12408, '1240cm3') done
(12409, 'flexi') done
(12410, 'cleandiesel') done
(12411, '191km') done
(12412, '2di') done
(12413, 'b170') done
(12414, '271000') done
(12415, 'dlugi') done
(12416, '18500') done
(12417, '3xm') done
(12418, 'dowod') done
(12419, 'prezentuje') done
(12420, 'pilotem') done
(12421, '89tyskm') done
(12422, 'edit') done
(12423, 'impuls') done
(12424, 'pełenserwis') done
(12425, 'stag300') done
(12426, 'cmr') done
(12427, '25cm3') done
(12428, 'zbliżeniowy') done
(12429, 'rej2018') done
(12430, 'kutryny') done
(12431, 'fo') done
(12432, 'superwagen') done
(12433, 'kompety') done
(12434, 'zamina') 

(12736, '35"') done
(12737, 'lovells') done
(12738, 'rev') done
(12739, '303ps') done
(12740, 'xz') done
(12741, 'z350') done
(12742, 'z370') done
(12743, '999cm') done
(12744, 'bezwypadkowt') done
(12745, 'rossa') done
(12746, '2030r') done
(12747, 'pasaat') done
(12748, '125oootkm') done
(12749, 'kolekcjonerskim') done
(12750, '19800km') done
(12751, 'pelni') done
(12752, '"x') done
(12753, 'gear') done
(12754, 'delica') done
(12755, '33;') done
(12756, 'wd8806e') done
(12757, 'gd269jf') done
(12758, 'lapmy') done
(12759, '1włas') done
(12760, 'montegoblue') done
(12761, '201000') done
(12762, '3szt') done
(12763, 'sezony') done
(12764, '35400') done
(12765, 'neony') done
(12766, '81600km') done
(12767, 'ga355as') done
(12768, 'cb284hp') done
(12769, 'intenseplus') done
(12770, 'premiumo') done
(12771, '0ide') done
(12772, 'przyjedz') done
(12773, "alu17'") done
(12774, 'moro') done
(12775, '4xes') done
(12776, '154tyś') done
(12777, 'włas') done
(12778, 'felicja') done
(12779, '::za

(13081, 'ltd') done
(13082, '4xclimatronik') done
(13083, 'podgzewana') done
(13084, 'do2018r') done
(13085, 'udukomentowane') done
(13086, 'wd1201m') done
(13087, 'wd9531j') done
(13088, 'wd0223j') done
(13089, 'rivoli') done
(13090, 'ledvision') done
(13091, 'hiundai') done
(13092, '40netto') done
(13093, 'innovision') done
(13094, 'termin') done
(13095, 'powerxtra') done
(13096, '561zł') done
(13097, 'wd7454k') done
(13098, 'wd0943l') done
(13099, 'zs763jx') done
(13100, 'wd0826j') done
(13101, 'wd9985l') done
(13102, 'wd9041l') done
(13103, 'gd435mt') done
(13104, 'tempoat') done
(13105, 'extreme') done
(13106, '120000km') done
(13107, 'd4d143') done
(13108, 'kr5x141') done
(13109, 'wd9984l') done
(13110, 'el8g194') done
(13111, 'wd0836j') done
(13112, 'wd9968l') done
(13113, 'przyjedziesz') done
(13114, 'kupisz') done
(13115, 'gle350') done
(13116, '1sz') done
(13117, '795km') done
(13118, 'kwota') done
(13119, 'bhdi') done
(13120, 'holowanie') done
(13121, '22tys') done
(13122, '

(13422, 'ed30') done
(13423, 'myway') done
(13424, 'comfor') done
(13425, 'krajowej') done
(13426, 'dealerskiej') done
(13427, 'zaprraszam') done
(13428, 'ahh') done
(13429, 'pompie') done
(13430, 'poliftowego') done
(13431, 'uderzona') done
(13432, 'nietuzinkowe') done
(13433, 'chlodnice') done
(13434, 'mjd') done
(13435, 'luxurypack') done
(13436, 'ravat23') done
(13437, '0hdi241ps') done
(13438, 'exclusiveplus') done
(13439, 'obejrzec') done
(13440, 'wpomaganie') done
(13441, '187ps') done
(13442, 'join') done
(13443, 'asysten') done
(13444, 'manuala') done
(13445, '794') done
(13446, 'cla180') done
(13447, '44kw') done
(13448, '52tys') done
(13449, 'afs') done
(13450, "alu17''aso") done
(13451, '83300km') done
(13452, 'finansoanie') done
(13453, 'tourana') done
(13454, 'delikatnieuszk') done
(13455, 'zarejestrpl') done
(13456, '795') done
(13457, 'autostradowy') done
(13458, '5td5') done
(13459, 'toyo') done
(13460, 'j24b') done
(13461, 'dostawadodomu') done
(13462, '153000km') don

(13764, 'tucosn') done
(13765, 'selectione') done
(13766, '114d') done
(13767, '105km;') done
(13768, 'sport;') done
(13769, 'klima;') done
(13770, 'zadbana;') done
(13771, 'zgorzelec') done
(13772, 'autko:') done
(13773, 'm550') done
(13774, '79900mil') done
(13775, 'japonska') done
(13776, 'roweru') done
(13777, 'okazia') done
(13778, 'bwypadkowy') done


In [None]:
analysis.to_csv('analyse_variables_results.csv')

In [3]:
analysis = pd.read_csv('analyse_variables_results.csv')

In [4]:
analysis.drop(columns = 'Unnamed: 0', inplace = True)

In [10]:
variables = analysis['variable'].values.tolist()

In [11]:
cleaned_variables = [x for x in variables if str(x) != 'nan' and len(x) >=3 ] #let's remove everything shorter than 3 characters

In [48]:
x.add_dummies2(cleaned_variables)

<__main__.CarData at 0x1bd47c4fd08>

In [13]:
x.data.to_csv('training_set_21-06-2020.csv')

In [14]:
x.data

Unnamed: 0,ID,title,price,sub_title,mileage_km,engine_cm3,engine_type,province,negotiable,concat_title_subtitle,...,szy,ele,van,ass,grz,nisk,bez,kier,ani,and
0,1,Mazda CX-3,69700,150KM 6AT 4x4 SkyPASSION (+ Biala skóra + Safe...,51015.0,1998,Benzyna,Śląskie,,cx 3 1 6at skysion biala s safety i,...,0,0,0,0,0,0,0,0,0,0
1,2,Peugeot Partner,19500,Opłacony~Dokumentacja Przebiegu,180000.0,1600,Diesel,Mazowieckie,Do negocjacji,tner ay dokumentacja eu,...,0,0,0,0,0,0,0,0,0,0
2,3,Kia Sportage,26800,Opłacona~Napęd 4x4,165000.0,2000,Benzyna,Mazowieckie,Do negocjacji,age aa napęd,...,0,0,0,0,0,0,0,0,0,0
3,4,Škoda Roomster,12900,Opłacona~Serwis ASO~Klimatronic~Grzane siedzenia,238000.0,1600,Benzyna,Mazowieckie,Do negocjacji,ša roomster aa wis tro ne siedzenia,...,0,0,0,0,0,0,0,0,0,0
4,5,Opel Combo,14500,Klimatyzacja~Przebieg~Faktura~Polski Salon,136000.0,1400,Benzyna,Mazowieckie,"Do negocjacji, Faktura VAT",combo e fura,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4827,62074,Ford Focus 1.6,23000,Ford Focus,107000.0,1560,Diesel,Łódzkie,Do negocjacji,1 6,...,0,0,0,0,0,0,0,0,0,0
4828,62075,Mini Cooper S,22500,,135340.0,2000,Diesel,Wielkopolskie,Do negocjacji,mini cooper s nan,...,0,0,0,0,0,0,0,0,0,0
4829,62076,Toyota Corolla 1.4,8900,Toyota Corolla 1.4,157500.0,1398,Benzyna,Dolnośląskie,Do negocjacji,clla 1 4 clla 1 4,...,0,0,0,0,0,0,0,0,0,0
4830,62077,Toyota Corolla Verso,12900,Toyota Corolla Verso,206000.0,1995,Diesel,Małopolskie,Do negocjacji,clla verso clla verso,...,0,0,0,0,0,0,0,0,0,0


In [49]:
y = x.data['price'].copy()

In [50]:
x.add_dummies(x.data['engine_type'].unique().tolist(), ['engine_type'])

<__main__.CarData at 0x1bd47c4fd08>

In [51]:
x.add_dummies(x.data['province'].unique().tolist(), ['province'])

<__main__.CarData at 0x1bd47c4fd08>

In [56]:
x.data.head()

Unnamed: 0,ID,title,price,sub_title,mileage_km,year,engine_cm3,engine_type,city,province,negotiable,concat_title_subtitle,alfa,audi,bmw,chevrolet,chrysler,citroen,dacia,daewoo,dodge,fiat,ford,honda,hyundai,jaguar,jeep,kia,rover,lexus,mazda,mercedes,mitsubishi,nissan,opel,peugeot,porsche,renault,seat,smart,subaru,suzuki,tesla,toyota,volkswagen,volvo,skoda,drive,fv23,v23,20d,pakiet,akiet,pak,quattro,vat,dsg,quat,fort,line,for,akt,kam,amer,fak,led,tsi,pre,klimatyzacja,ful,salon,180,sal,tyl,corsa,polo,klima,klim,raw,aso,tiv,16v,elektr,drzw,ima,drz,gaz,fabia,opłat,elekt,lim,sport,elek,50km,0td,niemie,asa,sprowadzon,spro,wad,niem,150k,lpg,clio,fiesta,fiest,dobry,4x4,ład,opł,201,zadbana,200,polska,alufelgi,pano,spr,benzyn,benzy,klas,alufelg,enzyna,alufel,sce,zadb,ram,man,150,105,felgi,bdb,spo,90km,gwa,pcja,felg,tec,citroën,focus,astra,jak,picasso,megan,astr,krajowy,qashqai,qashqa,zarej,yaris,nav,opc,reje,iii,kombi,jest,pic,zar,tronik,mega,dam,con,krajow,dti,polski,bard,alu,pan,ara,tom,aut,rej,nie,str,hdi,kom,seria,seri,kraj,szyb,golf,przed,polecam,poleca,gol,ins,nowy,wszy,titanium,kóra,wers,wer,rav,naw,sta,fab,pier,oryginal,pol,peł,okazja,okaz,orygin,skó,tra,oryg,prz,dach,wyp,super,grand,sup,lift,110,000,turb,bezw,avant,rok,lif,diesel,diese,idea,wła,tyś,pas,tdci,tys,cic,ser,dci,100,tdc,tour,bieg,tempo,140,mały,pryw,zam,cam,osob,ory,320,oso,tel,park,mondeo,monde,crd,nic,xen,avensis,bog,mat,40km,ben,kod,oro,gran,120,130,temp,sel,tdi,par,pro,fote,tem,sat,fot,tani,170,grza,pdc,octavia,octavi,sto,max,cta,pod,szy,ele,van,ass,grz,nisk,bez,kier,ani,and,engine_type_Benzyna,engine_type_Diesel,engine_type_Hybryda,engine_type_Elektryczny,province_slaskie,province_mazowieckie,province_lódzkie,province_lubelskie,province_malopolskie,province_wielkopolskie,province_warmińsko-mazurskie,province_lubuskie,province_kujawsko-pomorskie,province_podlaskie,province_podkarpackie,province_pomorskie,province_opolskie,province_swietokrzyskie,province_region midtjylland,province_zylina,province_moravian-silesian region,province_noord-brabant,province_wilno,province_noord-holland,province_region hovestaden,province_nordrhein-westfalen,province_languedoc-roussillon-midi-pyrénées,province_hessen,province_berlin,province_niedersachsen
0,1,Mazda CX-3,69700,150KM 6AT 4x4 SkyPASSION (+ Biala skóra + Safe...,51015.0,2015,1998,,Katowice,,,cx 3 1 6at skysion biala s safety i,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,2,Peugeot Partner,19500,Opłacony~Dokumentacja Przebiegu,180000.0,2009,1600,,Garbatka-Letnisko,,Do negocjacji,tner ay dokumentacja eu,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,3,Kia Sportage,26800,Opłacona~Napęd 4x4,165000.0,2009,2000,,Garbatka-Letnisko,,Do negocjacji,age aa napęd,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,4,Škoda Roomster,12900,Opłacona~Serwis ASO~Klimatronic~Grzane siedzenia,238000.0,2006,1600,,Garbatka-Letnisko,,Do negocjacji,ša roomster aa wis tro ne siedzenia,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,5,Opel Combo,14500,Klimatyzacja~Przebieg~Faktura~Polski Salon,136000.0,2009,1400,,Garbatka-Letnisko,,"Do negocjacji, Faktura VAT",combo e fura,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [75]:
X = pd.concat([x.data.iloc[: , [4,5,6]], x.data.iloc[: , 12:319]], axis = 1, sort = False)

In [76]:
X.to_csv('X_21_06_2020.csv')

In [77]:
stand_scale = StandardScaler()

In [78]:
X_scaled = stand_scale.fit_transform(X)

In [82]:
lin_reg = LogisticRegression()

In [None]:
lin_reg.fit(X_scaled, y)



In [None]:
y_pred = lin_reg.predict(y)

In [22]:
X.data.add_dummies()

Unnamed: 0,mileage_km
0,51015.0
1,180000.0
2,165000.0
3,238000.0
4,136000.0
...,...
4827,107000.0
4828,135340.0
4829,157500.0
4830,206000.0


In [14]:
x.add_dummies2(variables)

TypeError: first argument must be string or compiled pattern

In [4]:
results = pd.read_csv('results.csv')

In [55]:
pd.set_option('display.max_rows', 10000)
pd.set_option('display.max_columns', 10000)

In [58]:
data2 = AddDummy(data, 'title', 'ford')

In [3]:
x.data

NameError: name 'x' is not defined

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

In [20]:
pipe = make_pipeline(StandardScaler(), LogisticRegression(priors=None))

['Ala', 'MA', 'KOtA']