In [10]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import style
style.use('ggplot')
%matplotlib inline
import seaborn as sns

In [11]:
style.use('seaborn-white')
sns.set(rc={'figure.figsize':(13,10)})
sns.set_style({'image.cmap': 'jet'})

### Introduction to Modeling Data

Chapter 1 Roadmap:

- Motivating Examples
- Data Visualization
- Descriptive Statistics

![roadtrip](img/ch01_ex02_fig01.png)

In [12]:
# range of y data
y_range = 300 # miles
 # range of x data
x_range = 6 # hours
# estimating the speed
mph = y_range / x_range 
mph

50.0

### Visualizing a Model
![modelvix](img/ch01_ex02_fig02.png)

### Model Predictions

In [13]:
# model as python expression
hours = 6
miles = 50 * hours

# model predicts distance is 300 miles at 6 hours
time = 6
distance = 50 * time 
distance

300

In [14]:
def model(time):
    return 50*time

predicted_distance = model(time=10)

### Interpolation

- Is a model prediction for a point between the points we have measured<br>
![inter](img/ch01_ex02_fig03.png)

### Extrapolation

- Is a model prediction for a point outside the points we have measured
![extra](img/ch01_ex03_fig03.png)

In [17]:
times = np.array([ 0.,  1.,  2.,  3.,  4.,  5.,  6.])
distances = np.array([   0.        ,   44.04512153,  107.16353484,  148.43674052,
        196.39705633,  254.4358147 ,  300.        ])

In [18]:
# Compute the total change in distance and change in time
total_distance = distances[-1] - distances[0]
total_time = times[-1] - times[0]

# Estimate the slope of the data from the ratio of the changes
average_speed = total_distance / total_time

# Predict the distance traveled for a time not measured
elapse_time = 2.5
distance_traveled = average_speed * elapse_time
print("The distance traveled is {}".format(distance_traveled))

The distance traveled is 125.0


In [19]:
def model(time, a0=0, a1=50):
    """
    Purpose: 
        For a given value of time, compute the model value for distance
    Args:
        time (float, np.ndarray): elapse time in units of hours
        a0 (float): default=0, coefficient for the Zeroth order term in the model, i.e. a0 + a1*x
        a1 (float): default=50, coefficient for the 1st order term in the model, i.e. a0 + a1*x
    Returns:
        distance (float, np.ndarray): model values corresponding to input time array, with the same length/size.
    """
    distance = a0 + (a1*time)
    return distance

In [20]:
# Select a time not measured.
time = 8

# Use the model to compute a predicted distance for that time.
distance = model(time)

# Inspect the value of the predicted distance traveled.
print(distance)

# Determine if you will make it without refueling.
answer = (distance <= 400)
print(answer)

400
True


In [23]:
car1 = {'gallons': np.array([  0.        ,   1.66666667,   3.33333333,   5.        ,
          6.66666667,   8.33333333,  10.        ,  11.66666667,
         13.33333333,  15.        ,  16.66666667]),
        'miles': np.array([   0.,   50.,  100.,  150.,  200.,  250.,  300.,  350.,  400.,
         450.,  500.])}
car2 = {'gallons': np.array([  0.        ,   1.66666667,   3.33333333,   5.        ,
          6.66666667,   8.33333333,  10.        ,  11.66666667,
         13.33333333,  15.        ,  16.66666667]),
     'miles': np.array([   0.,   50.,  100.,  150.,  200.,  250.,  300.,  350.,  400.,
         450.,  500.])}

In [25]:
np.seterr(divide='ignore', invalid='ignore')
# Complete the function to model the efficiency.
def efficiency_model(miles, gallons):
   return np.mean( miles / gallons )

# Use the function to estimate the efficiency for each car.
car1['mpg'] = efficiency_model( car1['miles'] , car1['gallons'] )
car2['mpg'] = efficiency_model( car2['miles'] , car2['gallons'] )

# Finish the logic statement to compare the car efficiencies.
if car1['mpg'] > car2['mpg'] :
    print('car1 is the best')
elif car1['mpg'] < car2['mpg'] :
    print('car2 is the best')
else:
    print('the cars have the same efficiency')

the cars have the same efficiency
