# MPG Cars

### Introduction:

The following exercise utilizes data from [UC Irvine Machine Learning Repository](https://archive.ics.uci.edu/ml/datasets/Auto+MPG)

### Step 1. Import the necessary libraries

In [1]:
import pandas as pd

### Step 2. Import the first dataset [cars1](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/05_Merge/Auto_MPG/cars1.csv) and [cars2](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/05_Merge/Auto_MPG/cars2.csv).  

   ### Step 3. Assign each to a variable called cars1 and cars2

In [4]:
cars1 = pd.read_csv('https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/05_Merge/Auto_MPG/cars1.csv')
cars2 = pd.read_csv('https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/05_Merge/Auto_MPG/cars2.csv')

print(cars1.head())
print(cars2.head())

    mpg  cylinders  displacement horsepower  weight  acceleration  model  \
0  18.0          8           307        130    3504          12.0     70   
1  15.0          8           350        165    3693          11.5     70   
2  18.0          8           318        150    3436          11.0     70   
3  16.0          8           304        150    3433          12.0     70   
4  17.0          8           302        140    3449          10.5     70   

   origin                        car  Unnamed: 9  Unnamed: 10  Unnamed: 11  \
0       1  chevrolet chevelle malibu         NaN          NaN          NaN   
1       1          buick skylark 320         NaN          NaN          NaN   
2       1         plymouth satellite         NaN          NaN          NaN   
3       1              amc rebel sst         NaN          NaN          NaN   
4       1                ford torino         NaN          NaN          NaN   

   Unnamed: 12  Unnamed: 13  
0          NaN          NaN  
1          NaN

### Step 4. Oops, it seems our first dataset has some unnamed blank columns, fix cars1

In [5]:
cars1 = cars1.loc[:, "mpg":"car"]
cars1.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model,origin,car
0,18.0,8,307,130,3504,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350,165,3693,11.5,70,1,buick skylark 320
2,18.0,8,318,150,3436,11.0,70,1,plymouth satellite
3,16.0,8,304,150,3433,12.0,70,1,amc rebel sst
4,17.0,8,302,140,3449,10.5,70,1,ford torino


### Step 5. What is the number of observations in each dataset?

In [6]:
print(cars1.shape)
print(cars2.shape)

(198, 9)
(200, 9)


### Step 6. Join cars1 and cars2 into a single DataFrame called cars

In [9]:
cars = cars1._append(cars2)
cars

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model,origin,car
0,18.0,8,307,130,3504,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350,165,3693,11.5,70,1,buick skylark 320
2,18.0,8,318,150,3436,11.0,70,1,plymouth satellite
3,16.0,8,304,150,3433,12.0,70,1,amc rebel sst
4,17.0,8,302,140,3449,10.5,70,1,ford torino
...,...,...,...,...,...,...,...,...,...
195,27.0,4,140,86,2790,15.6,82,1,ford mustang gl
196,44.0,4,97,52,2130,24.6,82,2,vw pickup
197,32.0,4,135,84,2295,11.6,82,1,dodge rampage
198,28.0,4,120,79,2625,18.6,82,1,ford ranger


### Step 7. Oops, there is a column missing, called owners. Create a random number Series from 15,000 to 73,000.

In [10]:
import numpy as np

nr_owners = np.random.randint(15000, high=73001, size=398, dtype='l')
nr_owners

array([64249, 39911, 55309, 33669, 21779, 68252, 66433, 54758, 17006,
       63470, 53712, 59171, 16470, 57735, 33390, 43674, 52434, 65365,
       61792, 24270, 36629, 66955, 64543, 39879, 72738, 17014, 28030,
       69267, 33407, 44647, 37882, 66308, 30129, 39479, 63263, 29775,
       33040, 53394, 50219, 26064, 61944, 66521, 38693, 15027, 18400,
       51987, 68123, 42533, 55955, 30453, 40854, 36854, 56220, 37914,
       46511, 44394, 63040, 61803, 42104, 59622, 34116, 49080, 27192,
       55667, 64190, 19700, 19515, 35094, 47773, 42989, 71160, 56057,
       66804, 19871, 57686, 36379, 69034, 71687, 67454, 57339, 15587,
       61449, 35817, 54784, 70326, 35212, 55399, 26283, 71705, 62918,
       71615, 28994, 71669, 45971, 45868, 41632, 17607, 59488, 40352,
       42324, 40520, 40275, 57107, 67912, 69708, 23884, 60585, 39090,
       63119, 36670, 51848, 54553, 45353, 71332, 65368, 22564, 66023,
       32260, 70741, 69054, 34234, 46645, 67064, 43237, 41464, 29175,
       67090, 45807,

### Step 8. Add the column owners to cars

In [12]:
cars['owners'] = nr_owners
cars

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model,origin,car,owners
0,18.0,8,307,130,3504,12.0,70,1,chevrolet chevelle malibu,64249
1,15.0,8,350,165,3693,11.5,70,1,buick skylark 320,39911
2,18.0,8,318,150,3436,11.0,70,1,plymouth satellite,55309
3,16.0,8,304,150,3433,12.0,70,1,amc rebel sst,33669
4,17.0,8,302,140,3449,10.5,70,1,ford torino,21779
...,...,...,...,...,...,...,...,...,...,...
195,27.0,4,140,86,2790,15.6,82,1,ford mustang gl,46461
196,44.0,4,97,52,2130,24.6,82,2,vw pickup,15360
197,32.0,4,135,84,2295,11.6,82,1,dodge rampage,50558
198,28.0,4,120,79,2625,18.6,82,1,ford ranger,17918
