# Multiple Linear Regression Assignment

####       Importing all the neccessary libraries

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn 
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.feature_selection import RFE
from sklearn.linear_model import LinearRegression
import statsmodels.api as sm
from statsmodels.stats.outliers_influence import variance_inflation_factor
from sklearn.metrics import r2_score

In [None]:
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
pd.set_option('display.expand_frame_repr', False)

  ### Reading the csv file

In [None]:
CarPrices = pd.read_csv(r"D:\Data Science and Analytics\CarPrice_Assignment.csv")
CarPrices.head()

### Understanding and Handling the data set

In [None]:
CarPrices.shape

In [None]:
CarPrices.info()

In [None]:
CarPrices.isnull().sum()

In [None]:
CarPrices.describe()

In [None]:
CarPrices['CarName']

In [None]:
# Spliting the CarName column into CarCompany and CarModel
CarPrices['CarCompany'] = CarPrices['CarName'].apply(lambda x: x.split(' ')[0])
CarPrices['CarModel'] = CarPrices['CarName'].apply(lambda x: x.split(' ')[-1])
CarPrices['CarCompany'] = CarPrices['CarCompany'].apply(lambda x: x.capitalize())

In [None]:
CarPrices['CarCompany'].value_counts()

In [None]:
# Replacing the incorrect spellings
CarPrices['CarCompany'].replace('Maxda','Mazda',inplace=True)
CarPrices['CarCompany'].replace('Porcshce','Porsche',inplace=True)
CarPrices['CarCompany'].replace('Toyouta','Toyota',inplace=True)
CarPrices['CarCompany'].replace(['Vokswagen','Vw'],'Volkswagen',inplace=True)

In [None]:
CarPrices['CarCompany'].value_counts()

In [None]:
CarPrices.drop(columns = 'CarName', inplace = True)

## Making the symboling column more readable
    0 and 1 : Moderate
    -2 and -1 : Safe
    1 and 2 : Risky

In [None]:
CarPrices['symboling'].value_counts()

In [None]:
CarPrices['symboling'] = CarPrices[['symboling']].apply(lambda x: x.map({0:'moderate', 1:'moderate', 2:'risky',3:'risky',-1:'safe',-2:'safe'}))

In [None]:
CarPrices['symboling'].value_counts()

#### Observing all Categorical variables

In [None]:
CarPrices['fueltype'].value_counts()

In [None]:
CarPrices['aspiration'].value_counts()

In [None]:
CarPrices['doornumber'].value_counts()

In [None]:
CarPrices['carbody'].value_counts()

In [None]:
CarPrices['drivewheel'].value_counts()

In [None]:
CarPrices['enginelocation'].value_counts()

In [None]:
CarPrices['enginetype'].value_counts()

In [None]:
CarPrices['cylindernumber'].value_counts()

In [None]:
CarPrices['fuelsystem'].value_counts()

### Observing all Numerical varibles

In [None]:
numerical_vars = ['wheelbase', 'carlength', 'carwidth', 'carheight', 'curbweight', 'enginesize', 'boreratio', 'stroke', 'compressionratio', 'horsepower', 'peakrpm', 'citympg', 'highwaympg']
j = 1
for i in numerical_vars:
    plt.figure(figsize = [20,20])
    plt.subplot(5,3,j)
    sns.distplot(CarPrices[i])
    j = j + 1

## We can observe few numerical variables contains large number of outliers
     So we deal with this outliers first.

In [None]:
print(CarPrices[['carwidth', 'carheight', 'boreratio', 'stroke', 'peakrpm',
                'citympg', 'highwaympg', 'horsepower', 'curbweight', 'enginesize']].quantile([0.01, .98]))
print(CarPrices[['compressionratio']].quantile([0.01, .90]))

In [None]:
CarPrices['carwidth'][np.abs(CarPrices['carwidth'] > 71.676)]= 71.676
CarPrices['carheight'][np.abs(CarPrices['carheight'] > 59.068)]= 59.068
CarPrices['boreratio'][np.abs(CarPrices['boreratio'] > 3.78)]= 3.78
CarPrices['stroke'][np.abs(CarPrices['stroke'] > 3.8968)]= 3.8968
CarPrices['peakrpm'][np.abs(CarPrices['peakrpm'] > 6000)]= 6000
CarPrices['citympg'][np.abs(CarPrices['citympg'] > 38.00)]= 38.00
CarPrices['highwaympg'][np.abs(CarPrices['highwaympg'] > 46.92)]= 46.92
CarPrices['curbweight'][np.abs(CarPrices['curbweight'] > 3768.40)]= 3768.40
CarPrices['horsepower'][np.abs(CarPrices['horsepower'] > 206.44)]= 206.44
CarPrices['enginesize'][np.abs(CarPrices['enginesize'] > 256.08)]= 256.08
CarPrices['compressionratio'][np.abs(CarPrices['compressionratio'] > 10.94)]= 10.94

In [None]:
numerical_vars = ['wheelbase', 'carlength', 'carwidth', 'carheight', 'curbweight', 'enginesize', 'boreratio', 'stroke', 'compressionratio', 'horsepower', 'peakrpm', 'citympg', 'highwaympg']
j = 1
for i in numerical_vars:
    plt.figure(figsize = [20,20])
    plt.subplot(5,3,j)
    sns.distplot(CarPrices[i])
    j = j + 1

###### Now the distribution of the numerical variables are good.

In [None]:
plt.figure(figsize = [20,16])
plt.subplot(4,3,1)
sns.boxplot(x = 'symboling', y = 'price', data = CarPrices)
plt.subplot(4,3,2)
sns.boxplot(x = 'fueltype', y = 'price', data = CarPrices)
plt.subplot(4,3,3)
sns.boxplot(x = 'aspiration', y = 'price', data = CarPrices)
plt.subplot(4,3,4)
sns.boxplot(x = 'doornumber', y = 'price', data = CarPrices)
plt.subplot(4,3,5)
sns.boxplot(x = 'carbody', y = 'price', data = CarPrices)
plt.subplot(4,3,6)
sns.boxplot(x = 'drivewheel', y = 'price', data = CarPrices)
plt.subplot(4,3,7)
sns.boxplot(x = 'enginelocation', y = 'price', data = CarPrices)
plt.subplot(4,3,8)
sns.boxplot(x = 'enginetype', y = 'price', data = CarPrices)
plt.subplot(4,3,9)
sns.boxplot(x = 'cylindernumber', y = 'price', data = CarPrices)
plt.subplot(4,3,10)
sns.boxplot(x = 'fuelsystem', y = 'price', data = CarPrices)
plt.show()

In [None]:
plt.figure(figsize = [20,20])
sns.boxplot(x = 'CarCompany', y = 'price', data = CarPrices)
plt.show()

## Reading individual car company is difficult 
### So, we categorize them based on their average price in the market
    -  <10000 : C class
    - 10000 to 20000 : B class
    - >20000 : C class

In [None]:
CarPrices.groupby('CarCompany').price.mean().sort_values(ascending=False)

In [None]:
company_class_dict = {
    'Cheverolet' : 'C',
    'Dodge' : 'C',
    'Plymouth' : 'C',
    'Honda' : 'C',
    'Subaru' : 'C',
    'Isuzu' : 'C',
    'Mitsubishi' : 'C',
    'Renault' : 'C',
    'Toyota' : 'C',
    'Volkswagen' : 'B',
    'Nissan' : 'B',
    'Mazda' : 'B',
    'Saab' : 'B',
    'Peugeot' : 'B',
    'Alfa-romero' : 'B',
    'Mercury' : 'B',
    'Audi' : 'B',
    'Volvo' : 'B',
    'Bmw' : 'A',
    'Buick' : 'A',
    'Porsche' : 'A',
    'Jaguar' : 'A',
    }
CarPrices['company_class'] = CarPrices['CarCompany'].map(company_class_dict)
# Dropping the orignal car_company variable
CarPrices.head()

# Now changing Categorical varibles to Binary (1/0)
### If Categorical variables contains more than two features we use Dummy variable

In [None]:
carcompany = pd.get_dummies(CarPrices['company_class'])
carcompany.columns = ['A_class', 'B_class', 'C_class']
carcompany

In [None]:
carcompany.drop('C_class', axis = 1, inplace = True)

In [None]:
CarPrices = pd.concat([CarPrices, carcompany], axis = 1)

In [None]:
CarPrices

In [None]:
CarPrices1 = CarPrices.drop('CarCompany', axis = 1)

In [None]:
CarPrices1.drop('CarModel', axis = 1, inplace = True)

In [None]:
CarPrices1.drop('company_class', axis = 1, inplace = True)

In [None]:
CarPrices1

In [None]:
symboling = pd.get_dummies(CarPrices['symboling'], drop_first=True)
symboling.columns = ['symboling_risky', 'symboling_safe']

In [None]:
CarPrices1 = pd.concat([CarPrices1, symboling], axis = 1)

In [None]:
CarPrices1.drop('symboling', axis = 1, inplace = True)

In [None]:
fueltype = pd.get_dummies(CarPrices['fueltype'])
fueltype.columns = ['fueltype_diesel','fueltype_gas']
fueltype

In [None]:
CarPrices1 = pd.concat([CarPrices1, fueltype], axis = 1)

In [None]:
CarPrices1.drop('fueltype', axis = 1, inplace = True)

In [None]:
aspiration = pd.get_dummies(CarPrices['aspiration'])
aspiration.columns = ['aspiration_std', 'aspiration_turbo']
aspiration

In [None]:
CarPrices1 = pd.concat([CarPrices1, aspiration], axis = 1)

In [None]:
CarPrices1.drop('aspiration', axis = 1, inplace = True)

In [None]:
doornumber = pd.get_dummies(CarPrices['doornumber'])
doornumber.columns = ['doornumber_four', 'doornumber_two']

In [None]:
doornumber

In [None]:
CarPrices1 = pd.concat([CarPrices1, doornumber], axis = 1)

In [None]:
CarPrices1.drop('doornumber', axis = 1, inplace = True)

In [None]:
carbody = pd.get_dummies(CarPrices['carbody'], drop_first=True)
carbody.columns = ['carbody_hardtop', 'carbody_hatchback', 'carbody_sedan', 'carbody_wagon']
carbody

In [None]:
CarPrices1 = pd.concat([CarPrices1, carbody], axis = 1)

In [None]:
CarPrices1.drop('carbody', axis = 1, inplace = True)

In [None]:
drivewheel = pd.get_dummies(CarPrices['drivewheel'], drop_first=True)
drivewheel.columns = ['drivewheel_fwd', 'drivewheel_rwd']
drivewheel

In [None]:
CarPrices1 = pd.concat([CarPrices1, drivewheel], axis = 1)

In [None]:
CarPrices1.drop('drivewheel', axis = 1, inplace = True)

In [None]:
enginelocation = pd.get_dummies(CarPrices['enginelocation'])
enginelocation.columns = ['enginelocation_front', 'enginelocation_rear']
enginelocation

In [None]:
CarPrices1 = pd.concat([CarPrices1, enginelocation], axis = 1)

In [None]:
CarPrices1.drop('enginelocation', axis = 1, inplace = True)

In [None]:
enginetype = pd.get_dummies(CarPrices['enginetype'], drop_first=True)
enginetype.columns = ['enginetype_dohcv', 'enginetype_l', 'enginetype_ohc', 'enginetype_ohcf', 'enginetype_ohcv', 'enginetype_rotor']
enginetype

In [None]:
CarPrices1 = pd.concat([CarPrices1, enginetype], axis = 1)

In [None]:
CarPrices1.drop('enginetype', axis = 1, inplace = True)

In [None]:
cylindernumber = pd.get_dummies(CarPrices['cylindernumber'], drop_first=True)
cylindernumber.columns = ['cylindernumber_five', 'cylindernumber_four', 'cylindernumber_six', 'cylindernumber_three', 'cylindernumber_twelve', 'cylinder_two']
cylindernumber

In [None]:
CarPrices1 = pd.concat([CarPrices1, cylindernumber], axis = 1)

In [None]:
CarPrices1.drop('cylindernumber', axis = 1, inplace = True)

In [None]:
fuelsystem = pd.get_dummies(CarPrices['fuelsystem'], drop_first = True)
fuelsystem.columns = ['fuelsystem_2bbl', 'fuelsystem_4bbl', 'fuelsystem_idi', 'fuelsystem_mfi','fuelsystem_mpfi', 'fuelsystem_spdi', 'fuelsystem_spfi']
fuelsystem

In [None]:
CarPrices1 = pd.concat([CarPrices1, fuelsystem], axis = 1)

In [None]:
CarPrices1.drop('fuelsystem', axis = 1, inplace = True)

In [None]:
CarPrices1.drop('car_ID', axis = 1, inplace = True)

In [None]:
CarPrices1

In [None]:
CarPrices1.shape

# Now we split the data set into train and test sets

In [None]:
# Spliting into train and test

df_train, df_test = train_test_split(CarPrices1, train_size = 0.7, random_state = 100)

In [None]:
df_train.shape

In [None]:
df_test.shape

In [None]:
CarPrices1.info()

## Now we rescale the Numerical varibles using MinMaxScaler

In [None]:
# Rescalling the features
num_vars = ['wheelbase', 'carlength', 'carwidth', 'carheight', 'curbweight', 'enginesize', 'boreratio', 'stroke', 'compressionratio', 'horsepower', 'peakrpm', 'citympg', 'highwaympg']

In [None]:
Scaler = MinMaxScaler()
df_train[num_vars] = Scaler.fit_transform(df_train[num_vars])

In [None]:
df_train

In [None]:
df_train.describe()

In [None]:
# Heatmap
plt.figure(figsize = [100,100])
sns.heatmap(df_train.corr(), annot = True, cmap = 'YlGnBu')
plt.show()

# Now we start Building the model
### Here we use both atomated and manual methos to remove the variables

In [None]:
# X_train, y_train
y_train = df_train.pop('price')
X_train = df_train

In [None]:
X_train

In [None]:
# Building the model
X_train_sm = sm.add_constant(X_train)

# create model
lr = sm.OLS(y_train.astype(float), X_train_sm.astype(float))

# fit
lr_model = lr.fit()

#params
lr_model.summary()

In [None]:
# VIF
VIF = pd.DataFrame()
VIF['Features'] = X_train.columns
VIF['VIF value'] = [variance_inflation_factor (X_train.values, i) for i in range(X_train.shape[1])]
VIF['VIF value'] = round(VIF['VIF value'], 2)
VIF = VIF.sort_values(by = 'VIF value', ascending = False)

In [None]:
VIF

# First we use RFE method to remove the varibles 

In [None]:
lm = LinearRegression()
lm.fit(X_train, y_train)
rfe = RFE(lm, n_features_to_select = 15)
rfe = rfe.fit(X_train, y_train)

a = list(zip(X_train.columns, rfe.support_, rfe.ranking_))

col = X_train.columns[rfe.support_]

In [None]:
col

In [None]:
a

## From here we use manual method to eliminate the variable
#### We eliminate the variables based on Significance (p-values) and VIF

In [None]:
# Building the model again
X_train_rfe = X_train[col]

X_train_rfe = sm.add_constant(X_train_rfe)

# create model
lr = sm.OLS(y_train, X_train_rfe)

# fit
lr_model = lr.fit()

#params
lr_model.summary()

In [None]:
# VIF
VIF = pd.DataFrame()
VIF['Features'] = X_train_rfe.columns
VIF['VIF value'] = [variance_inflation_factor (X_train_rfe.values, i) for i in range(X_train_rfe.shape[1])]
VIF['VIF value'] = round(VIF['VIF value'], 2)
VIF = VIF.sort_values(by = 'VIF value', ascending = False)

In [None]:
VIF

### We go on eliminate the variables until we get best results
##### We accept the variable if both the conditions are met :
            - p-value    : < 0.05
            - VIF-value  : < 5

In [None]:
# Building the model again
X_train_rfe = X_train_rfe.drop('enginelocation_front', axis = 1)

X_train_rfe = sm.add_constant(X_train_rfe)

# create model
lr = sm.OLS(y_train, X_train_rfe)

# fit
lr_model = lr.fit()

#params
lr_model.summary()

In [None]:
# VIF
VIF = pd.DataFrame()
VIF['Features'] = X_train_rfe.columns
VIF['VIF value'] = [variance_inflation_factor (X_train_rfe.values, i) for i in range(X_train_rfe.shape[1])]
VIF['VIF value'] = round(VIF['VIF value'], 2)
VIF = VIF.sort_values(by = 'VIF value', ascending = False)

In [None]:
VIF

In [None]:
# Building the model again
X_train_rfe = X_train_rfe.drop('wheelbase', axis = 1)

X_train_rfe = sm.add_constant(X_train_rfe)

# create model
lr = sm.OLS(y_train, X_train_rfe)

# fit
lr_model = lr.fit()

#params
lr_model.summary()

In [None]:
# VIF
VIF = pd.DataFrame()
VIF['Features'] = X_train_rfe.columns
VIF['VIF value'] = [variance_inflation_factor (X_train_rfe.values, i) for i in range(X_train_rfe.shape[1])]
VIF['VIF value'] = round(VIF['VIF value'], 2)
VIF = VIF.sort_values(by = 'VIF value', ascending = False)

In [None]:
VIF

In [None]:
# Building the model again
X_train_rfe = X_train_rfe.drop('highwaympg', axis = 1)

X_train_rfe = sm.add_constant(X_train_rfe)

# create model
lr = sm.OLS(y_train, X_train_rfe)

# fit
lr_model = lr.fit()

#params
lr_model.summary()

In [None]:
# VIF
VIF = pd.DataFrame()
VIF['Features'] = X_train_rfe.columns
VIF['VIF value'] = [variance_inflation_factor (X_train_rfe.values, i) for i in range(X_train_rfe.shape[1])]
VIF['VIF value'] = round(VIF['VIF value'], 2)
VIF = VIF.sort_values(by = 'VIF value', ascending = False)

In [None]:
VIF

In [None]:
# Building the model again
X_train_rfe = X_train_rfe.drop('carbody_hardtop', axis = 1)

X_train_rfe = sm.add_constant(X_train_rfe)

# create model
lr = sm.OLS(y_train, X_train_rfe)

# fit
lr_model = lr.fit()

#params
lr_model.summary()

In [None]:
# VIF
VIF = pd.DataFrame()
VIF['Features'] = X_train_rfe.columns
VIF['VIF value'] = [variance_inflation_factor (X_train_rfe.values, i) for i in range(X_train_rfe.shape[1])]
VIF['VIF value'] = round(VIF['VIF value'], 2)
VIF = VIF.sort_values(by = 'VIF value', ascending = False)

In [None]:
VIF

In [None]:
# Building the model again
X_train_rfe = X_train_rfe.drop('curbweight', axis = 1)

X_train_rfe = sm.add_constant(X_train_rfe)

# create model
lr = sm.OLS(y_train, X_train_rfe)

# fit
lr_model = lr.fit()

#params
lr_model.summary()

In [None]:
# VIF
VIF = pd.DataFrame()
VIF['Features'] = X_train_rfe.columns
VIF['VIF value'] = [variance_inflation_factor (X_train_rfe.values, i) for i in range(X_train_rfe.shape[1])]
VIF['VIF value'] = round(VIF['VIF value'], 2)
VIF = VIF.sort_values(by = 'VIF value', ascending = False)

In [None]:
VIF

In [None]:
# Building the model again
X_train_rfe = X_train_rfe.drop('boreratio', axis = 1)

X_train_rfe = sm.add_constant(X_train_rfe)

# create model
lr = sm.OLS(y_train, X_train_rfe)

# fit
lr_model = lr.fit()

#params
lr_model.summary()

In [None]:
# VIF
VIF = pd.DataFrame()
VIF['Features'] = X_train_rfe.columns
VIF['VIF value'] = [variance_inflation_factor (X_train_rfe.values, i) for i in range(X_train_rfe.shape[1])]
VIF['VIF value'] = round(VIF['VIF value'], 2)
VIF = VIF.sort_values(by = 'VIF value', ascending = False)

In [None]:
VIF

In [None]:
# Building the model again
X_train_rfe = X_train_rfe.drop('enginelocation_rear', axis = 1)

X_train_rfe = sm.add_constant(X_train_rfe)

# create model
lr = sm.OLS(y_train, X_train_rfe)

# fit
lr_model = lr.fit()

#params
lr_model.summary()

In [None]:
# VIF
VIF = pd.DataFrame()
VIF['Features'] = X_train_rfe.columns
VIF['VIF value'] = [variance_inflation_factor (X_train_rfe.values, i) for i in range(X_train_rfe.shape[1])]
VIF['VIF value'] = round(VIF['VIF value'], 2)
VIF = VIF.sort_values(by = 'VIF value', ascending = False)

In [None]:
VIF

In [None]:
# Building the model again
X_train_rfe = X_train_rfe.drop('enginetype_rotor', axis = 1)

X_train_rfe = sm.add_constant(X_train_rfe)

# create model
lr = sm.OLS(y_train, X_train_rfe)

# fit
lr_model = lr.fit()

#params
lr_model.summary()

In [None]:
# VIF
VIF = pd.DataFrame()
VIF['Features'] = X_train_rfe.columns
VIF['VIF value'] = [variance_inflation_factor (X_train_rfe.values, i) for i in range(X_train_rfe.shape[1])]
VIF['VIF value'] = round(VIF['VIF value'], 2)
VIF = VIF.sort_values(by = 'VIF value', ascending = False)

In [None]:
VIF

In [None]:
# Building the model again
X_train_rfe = X_train_rfe.drop('carbody_sedan', axis = 1)

X_train_rfe = sm.add_constant(X_train_rfe)

# create model
lr = sm.OLS(y_train, X_train_rfe)

# fit
lr_model = lr.fit()

#params
lr_model.summary()

In [None]:
# VIF
VIF = pd.DataFrame()
VIF['Features'] = X_train_rfe.columns
VIF['VIF value'] = [variance_inflation_factor (X_train_rfe.values, i) for i in range(X_train_rfe.shape[1])]
VIF['VIF value'] = round(VIF['VIF value'], 2)
VIF = VIF.sort_values(by = 'VIF value', ascending = False)

In [None]:
VIF

In [None]:
# Building the model again
X_train_rfe = X_train_rfe.drop('carbody_wagon', axis = 1)

X_train_rfe = sm.add_constant(X_train_rfe)

# create model
lr = sm.OLS(y_train, X_train_rfe)

# fit
lr_model = lr.fit()

#params
lr_model.summary()

In [None]:
# VIF
VIF = pd.DataFrame()
VIF['Features'] = X_train_rfe.columns
VIF['VIF value'] = [variance_inflation_factor (X_train_rfe.values, i) for i in range(X_train_rfe.shape[1])]
VIF['VIF value'] = round(VIF['VIF value'], 2)
VIF = VIF.sort_values(by = 'VIF value', ascending = False)

In [None]:
VIF

##### Our model is ready, now we check the residuals

In [None]:
# Residual Analysis
y_train_pred = lr_model.predict(X_train_rfe)
y_train_pred

In [None]:
res = y_train - y_train_pred
sns.distplot(res)
plt.show()

### And our model follows the assumptions of Linear Regression

# Now we predict and evaluate the model on test set

In [None]:
# Prediction and Evaluation on the test set
df_test[num_vars] = Scaler.transform(df_test[num_vars])
df_test

In [None]:
df_test.describe()

In [None]:
y_test = df_test.pop('price')
X_test = df_test

In [None]:
X_test_sm = sm.add_constant(X_test)

In [None]:
X_test_rfe = X_test_sm[VIF['Features']]

In [None]:
X_test_rfe

In [None]:
# predict
y_test_pred = lr_model.predict(X_test_rfe)

In [None]:
r2_score(y_true = y_test, y_pred = y_test_pred)

#### We can see that the equation of our best fitted line is:

price = 1526.6774 + 13480 x carwidth + 12590 x horsepower + 9991.3 x A_class - 1488.43 x carbody_hatchback + 5112.75 x cylindernumber_three

This equation implies how the price of the vehical changes with a unit change in any of these independent variable with all other variables held constant.
For example we can say, the price of the car changes by 13480 for every unit change in the carwidth dimension if all other variables are held constant. And so is true for all other variables.
The predictor carbody_convertible suggest that the price of car decreases by a factor of 1488.43 when the car body is hatchback.
And the price of the car increases by 12590 for increase in unit of Horse power.
The cylinder_number_three increases the car price by 5112.75
The company name is also essensial in determining the price the automobile. A class companies names like, BMW, Buik, Porsche and Jaguar further adds up to the price by a factor of 9991.3.

In [None]:
CarPrices['price'].mean()

#### Suggestions:
The chinese company Geely Automotives entering the US market should consider the below listed points,

Average US car price: 13000 USD(approx).
For profitability and effective pricing of cars in this new market, the model's price predictors variables have to be regulated and balanced effectively to meet certain price levels and gain an edge over the other competitors.