# MPG Cars

### Introduction:

The following exercise utilizes data from [UC Irvine Machine Learning Repository](https://archive.ics.uci.edu/ml/datasets/Auto+MPG)

### Step 1. Import the necessary libraries

In [1]:
import pandas as pd 
import numpy as np

### Step 2. Import the first dataset [cars1](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/05_Merge/Auto_MPG/cars1.csv) and [cars2](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/05_Merge/Auto_MPG/cars2.csv).  

   ### Step 3. Assign each to a variable called cars1 and cars2

In [2]:
cars1 = pd.read_csv('https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/05_Merge/Auto_MPG/cars1.csv')
cars2 = pd.read_csv('https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/05_Merge/Auto_MPG/cars2.csv')

In [3]:
cars1.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model,origin,car,Unnamed: 9,Unnamed: 10,Unnamed: 11,Unnamed: 12,Unnamed: 13
0,18.0,8,307,130,3504,12.0,70,1,chevrolet chevelle malibu,,,,,
1,15.0,8,350,165,3693,11.5,70,1,buick skylark 320,,,,,
2,18.0,8,318,150,3436,11.0,70,1,plymouth satellite,,,,,
3,16.0,8,304,150,3433,12.0,70,1,amc rebel sst,,,,,
4,17.0,8,302,140,3449,10.5,70,1,ford torino,,,,,


In [4]:
cars2.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model,origin,car
0,33.0,4,91,53,1795,17.4,76,3,honda civic
1,20.0,6,225,100,3651,17.7,76,1,dodge aspen se
2,18.0,6,250,78,3574,21.0,76,1,ford granada ghia
3,18.5,6,250,110,3645,16.2,76,1,pontiac ventura sj
4,17.5,6,258,95,3193,17.8,76,1,amc pacer d/l


### Step 4. Oops, it seems our first dataset has some unnamed blank columns, fix cars1

In [5]:
cars1=cars1.loc[:,'mpg':'car']
cars1.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model,origin,car
0,18.0,8,307,130,3504,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350,165,3693,11.5,70,1,buick skylark 320
2,18.0,8,318,150,3436,11.0,70,1,plymouth satellite
3,16.0,8,304,150,3433,12.0,70,1,amc rebel sst
4,17.0,8,302,140,3449,10.5,70,1,ford torino


### Step 5. What is the number of observations in each dataset?

In [6]:
print('cars1 : ',cars1.shape[0],'rows')
print('cars2 : ',cars2.shape[0],'rows')

cars1 :  198 rows
cars2 :  200 rows


### Step 6. Join cars1 and cars2 into a single DataFrame called cars

In [7]:
cars = pd.concat([cars1,cars2])
cars


Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model,origin,car
0,18.0,8,307,130,3504,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350,165,3693,11.5,70,1,buick skylark 320
2,18.0,8,318,150,3436,11.0,70,1,plymouth satellite
3,16.0,8,304,150,3433,12.0,70,1,amc rebel sst
4,17.0,8,302,140,3449,10.5,70,1,ford torino
...,...,...,...,...,...,...,...,...,...
195,27.0,4,140,86,2790,15.6,82,1,ford mustang gl
196,44.0,4,97,52,2130,24.6,82,2,vw pickup
197,32.0,4,135,84,2295,11.6,82,1,dodge rampage
198,28.0,4,120,79,2625,18.6,82,1,ford ranger


### Step 7. Oops, there is a column missing, called owners. Create a random number Series from 15,000 to 73,000.

In [8]:
owners=np.random.randint(1500,high=73001,size=398,dtype='l')
owners

array([12221, 44482, 48443,  8783, 56892, 56294,  8503, 24711, 13960,
        5341, 27831, 16350, 40755, 15114, 46675, 71334, 72501, 32607,
       38043, 67690, 42370, 13570, 42257, 59677, 11755, 20109, 27615,
       33483, 14823, 12855, 55631, 72068, 36455,  8216,  5498, 49990,
       36618, 10255, 25406, 29505, 56849, 29693, 21683, 55647, 11533,
       46254, 17427, 10388, 58599, 33934, 37466, 70635, 47422, 41125,
       63266, 33288, 61500, 54011, 15266, 64520, 28463,  4013, 25836,
       15350, 37962,  8360,  5784, 23881,  5414, 15659, 56826, 28781,
       71371, 11213, 19863, 19901, 14918, 18143,  7381, 70813, 25905,
       30151,  1968, 64023, 21008, 56997, 57316, 50865, 50002, 52300,
       12590, 71086, 14415, 25456, 66033, 29716, 71535, 16148,  5913,
       36211,  4034,  4911, 53400, 67779, 59779, 45865, 35993, 13900,
       47940, 46381, 30310, 51540, 43217, 33699, 52421, 63620, 35658,
       17159, 39706,  5527, 50967, 20339, 35673, 20220, 65859, 34658,
       67820,  3069,

### Step 8. Add the column owners to cars

In [9]:
cars['owners'] = owners
cars.head(10)

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model,origin,car,owners
0,18.0,8,307,130,3504,12.0,70,1,chevrolet chevelle malibu,12221
1,15.0,8,350,165,3693,11.5,70,1,buick skylark 320,44482
2,18.0,8,318,150,3436,11.0,70,1,plymouth satellite,48443
3,16.0,8,304,150,3433,12.0,70,1,amc rebel sst,8783
4,17.0,8,302,140,3449,10.5,70,1,ford torino,56892
5,15.0,8,429,198,4341,10.0,70,1,ford galaxie 500,56294
6,14.0,8,454,220,4354,9.0,70,1,chevrolet impala,8503
7,14.0,8,440,215,4312,8.5,70,1,plymouth fury iii,24711
8,14.0,8,455,225,4425,10.0,70,1,pontiac catalina,13960
9,15.0,8,390,190,3850,8.5,70,1,amc ambassador dpl,5341
