# MPG Cars

### Introduction:

The following exercise utilizes data from [UC Irvine Machine Learning Repository](https://archive.ics.uci.edu/ml/datasets/Auto+MPG)

### Step 1. Import the necessary libraries

In [1]:
import pandas as pd
import numpy as np

### Step 2. Import the first dataset [cars1](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/05_Merge/Auto_MPG/cars1.csv) and [cars2](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/05_Merge/Auto_MPG/cars2.csv).  

   ### Step 3. Assign each to a variable called cars1 and cars2

In [7]:
cars1 = pd.read_csv("cars1.csv")
cars2 = pd.read_csv("cars2.csv")
print(cars1.head())
print(cars2.head())

    mpg  cylinders  displacement horsepower  weight  acceleration  model  \
0  18.0          8           307        130    3504          12.0     70   
1  15.0          8           350        165    3693          11.5     70   
2  18.0          8           318        150    3436          11.0     70   
3  16.0          8           304        150    3433          12.0     70   
4  17.0          8           302        140    3449          10.5     70   

   origin                        car  Unnamed: 9  Unnamed: 10  Unnamed: 11  \
0       1  chevrolet chevelle malibu         NaN          NaN          NaN   
1       1          buick skylark 320         NaN          NaN          NaN   
2       1         plymouth satellite         NaN          NaN          NaN   
3       1              amc rebel sst         NaN          NaN          NaN   
4       1                ford torino         NaN          NaN          NaN   

   Unnamed: 12  Unnamed: 13  
0          NaN          NaN  
1          NaN

### Step 4. Oops, it seems our first dataset has some unnamed blank columns, fix cars1

In [9]:
cars1.columns
cars1 = cars1.loc[:,:"car"]
cars1

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model,origin,car
0,18.0,8,307,130,3504,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350,165,3693,11.5,70,1,buick skylark 320
2,18.0,8,318,150,3436,11.0,70,1,plymouth satellite
3,16.0,8,304,150,3433,12.0,70,1,amc rebel sst
4,17.0,8,302,140,3449,10.5,70,1,ford torino
...,...,...,...,...,...,...,...,...,...
193,24.0,6,200,81,3012,17.6,76,1,ford maverick
194,22.5,6,232,90,3085,17.6,76,1,amc hornet
195,29.0,4,85,52,2035,22.2,76,1,chevrolet chevette
196,24.5,4,98,60,2164,22.1,76,1,chevrolet woody


### Step 5. What is the number of observations in each dataset?

In [10]:
print(cars1.shape)
print(cars2.shape)

(198, 9)
(200, 9)


### Step 6. Join cars1 and cars2 into a single DataFrame called cars

In [22]:
cars = cars1.merge(cars2,how="outer")
cars

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model,origin,car
0,9.0,8,304,193,4732,18.5,70,1,hi 1200d
1,10.0,8,307,200,4376,15.0,70,1,chevy c20
2,10.0,8,360,215,4615,14.0,70,1,ford f250
3,11.0,8,318,210,4382,13.5,70,1,dodge d200
4,11.0,8,350,180,3664,11.0,73,1,oldsmobile omega
...,...,...,...,...,...,...,...,...,...
393,43.4,4,90,48,2335,23.7,80,2,vw dasher (diesel)
394,44.0,4,97,52,2130,24.6,82,2,vw pickup
395,44.3,4,90,48,2085,21.7,80,2,vw rabbit c (diesel)
396,44.6,4,91,67,1850,13.8,80,3,honda civic 1500 gl


### Step 7. Oops, there is a column missing, called owners. Create a random number Series from 15,000 to 73,000.

In [23]:
owners = np.random.randint(low=15000,high=73001, size=398, dtype='l')
owners

array([25241, 56772, 47054, 60221, 45434, 18369, 20082, 50384, 59188,
       68212, 50817, 36838, 38807, 52242, 15937, 47119, 58116, 52567,
       50787, 35218, 16089, 42174, 63347, 43059, 72065, 63273, 29226,
       38801, 49030, 50521, 35406, 47380, 34115, 24977, 17421, 22107,
       24935, 42012, 23742, 71401, 49118, 21782, 41112, 33274, 67087,
       35034, 49357, 15563, 63639, 68675, 31501, 68678, 58620, 33030,
       46334, 63970, 40616, 70051, 44338, 41485, 56563, 39148, 54723,
       56975, 50308, 69771, 44900, 47319, 47861, 15774, 72486, 24473,
       38873, 31411, 53601, 50773, 16840, 29840, 17467, 71243, 52873,
       29521, 27971, 49422, 57717, 65480, 72616, 42689, 19595, 44312,
       32673, 16970, 56782, 21153, 40322, 61149, 49777, 69523, 30655,
       38887, 34143, 52768, 68452, 70423, 28159, 56359, 58033, 58205,
       57434, 23714, 24796, 71642, 36544, 54477, 37611, 32593, 39075,
       36369, 54716, 72567, 70820, 53245, 54273, 66149, 58312, 71770,
       66062, 61717,

### Step 8. Add the column owners to cars

In [25]:
cars["owners"] = owners
cars.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model,origin,car,owners
0,9.0,8,304,193,4732,18.5,70,1,hi 1200d,25241
1,10.0,8,307,200,4376,15.0,70,1,chevy c20,56772
2,10.0,8,360,215,4615,14.0,70,1,ford f250,47054
3,11.0,8,318,210,4382,13.5,70,1,dodge d200,60221
4,11.0,8,350,180,3664,11.0,73,1,oldsmobile omega,45434
