# MPG Cars

### Introduction:

The following exercise utilizes data from [UC Irvine Machine Learning Repository](https://archive.ics.uci.edu/ml/datasets/Auto+MPG)

### Step 1. Import the necessary libraries

In [1]:
import pandas as pd 
import numpy as np 

### Step 2. Import the first dataset [cars1](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/05_Merge/Auto_MPG/cars1.csv) and [cars2](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/05_Merge/Auto_MPG/cars2.csv).  

   ### Step 3. Assign each to a variable called cars1 and cars2

In [2]:
car1 = pd.read_csv('https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/05_Merge/Auto_MPG/cars1.csv')
car2 = pd.read_csv('https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/05_Merge/Auto_MPG/cars2.csv')
print(car1.head())
print(car2.head())

    mpg  cylinders  displacement horsepower  weight  acceleration  model  \
0  18.0          8           307        130    3504          12.0     70   
1  15.0          8           350        165    3693          11.5     70   
2  18.0          8           318        150    3436          11.0     70   
3  16.0          8           304        150    3433          12.0     70   
4  17.0          8           302        140    3449          10.5     70   

   origin                        car  Unnamed: 9  Unnamed: 10  Unnamed: 11  \
0       1  chevrolet chevelle malibu         NaN          NaN          NaN   
1       1          buick skylark 320         NaN          NaN          NaN   
2       1         plymouth satellite         NaN          NaN          NaN   
3       1              amc rebel sst         NaN          NaN          NaN   
4       1                ford torino         NaN          NaN          NaN   

   Unnamed: 12  Unnamed: 13  
0          NaN          NaN  
1          NaN

### Step 4. Oops, it seems our first dataset has some unnamed blank columns, fix cars1

In [3]:
car1 = car1.loc[:, 'mpg':'car']
car1.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model,origin,car
0,18.0,8,307,130,3504,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350,165,3693,11.5,70,1,buick skylark 320
2,18.0,8,318,150,3436,11.0,70,1,plymouth satellite
3,16.0,8,304,150,3433,12.0,70,1,amc rebel sst
4,17.0,8,302,140,3449,10.5,70,1,ford torino


### Step 5. What is the number of observations in each dataset?

In [5]:
print(car1.shape)
print(car2.shape)

(198, 9)
(200, 9)


### Step 6. Join cars1 and cars2 into a single DataFrame called cars

In [6]:
cars = car1.append(car2)
cars

  cars = car1.append(car2)


Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model,origin,car
0,18.0,8,307,130,3504,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350,165,3693,11.5,70,1,buick skylark 320
2,18.0,8,318,150,3436,11.0,70,1,plymouth satellite
3,16.0,8,304,150,3433,12.0,70,1,amc rebel sst
4,17.0,8,302,140,3449,10.5,70,1,ford torino
...,...,...,...,...,...,...,...,...,...
195,27.0,4,140,86,2790,15.6,82,1,ford mustang gl
196,44.0,4,97,52,2130,24.6,82,2,vw pickup
197,32.0,4,135,84,2295,11.6,82,1,dodge rampage
198,28.0,4,120,79,2625,18.6,82,1,ford ranger


### Step 7. Oops, there is a column missing, called owners. Create a random number Series from 15,000 to 73,000.

In [13]:
owners = np.random.randint(15000, high=73000, size=398, dtype='l')
owners

array([40638, 66492, 24013, 30784, 27789, 35245, 44333, 20496, 71911,
       50534, 67783, 41437, 22362, 24283, 43891, 57306, 69051, 23476,
       59269, 40171, 37354, 67977, 48202, 51092, 39583, 60207, 17596,
       64757, 55490, 62413, 72430, 69836, 33179, 33461, 71676, 63902,
       23555, 39896, 66555, 65440, 15861, 57557, 32130, 35271, 16153,
       15708, 46430, 72661, 50315, 38295, 63238, 64249, 19156, 69518,
       32767, 27720, 43615, 21125, 40234, 22425, 55495, 35572, 67138,
       48975, 40970, 66953, 17738, 24862, 57704, 27400, 56812, 28206,
       59010, 25291, 61271, 54153, 59102, 56115, 28220, 37239, 41726,
       63200, 50514, 57933, 43776, 60761, 28534, 29078, 58403, 27339,
       40513, 47050, 24252, 30553, 31774, 20630, 18581, 57935, 29911,
       27471, 68248, 58555, 15197, 53630, 38675, 40901, 61417, 50310,
       22711, 17046, 63995, 62272, 50332, 44079, 36519, 41779, 70794,
       27132, 30633, 22487, 46938, 51945, 18494, 59860, 71828, 64931,
       39947, 66522,

### Step 8. Add the column owners to cars

In [14]:
cars['owners'] = owners
cars.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model,origin,car,owners
0,18.0,8,307,130,3504,12.0,70,1,chevrolet chevelle malibu,40638
1,15.0,8,350,165,3693,11.5,70,1,buick skylark 320,66492
2,18.0,8,318,150,3436,11.0,70,1,plymouth satellite,24013
3,16.0,8,304,150,3433,12.0,70,1,amc rebel sst,30784
4,17.0,8,302,140,3449,10.5,70,1,ford torino,27789
