How to Fix: Input contains NaN

In [1]:
import pandas as pd
import numpy as np

In [2]:
#create DataFrame
df = pd.DataFrame({'x1': [1, 2, 2, 4, 2, 1, 5, 4, 2, 4, 4],
                   'x2': [1, 3, 3, 5, 2, 2, 1, np.inf, 0, 3, 4],
                   'y': [np.nan, 78, 85, 88, 72, 69, 94, 94, 88, 92, 90]})

In [3]:
#view DataFrame
print(df)

    x1   x2     y
0    1  1.0   NaN
1    2  3.0  78.0
2    2  3.0  85.0
3    4  5.0  88.0
4    2  2.0  72.0
5    1  2.0  69.0
6    5  1.0  94.0
7    4  inf  94.0
8    2  0.0  88.0
9    4  3.0  92.0
10   4  4.0  90.0


In [4]:
from sklearn.linear_model import LinearRegression

In [5]:
#initiate linear regression model
model = LinearRegression()

In [6]:
#define predictor and response variables
X, y = df[['x1', 'x2']], df.y

In [7]:
#fit regression model
model.fit(X, y)

ValueError: Input contains NaN, infinity or a value too large for dtype('float64').

In [8]:
#print model intercept and coefficients
print(model.intercept_, model.coef_)

AttributeError: 'LinearRegression' object has no attribute 'intercept_'

# How to Fix the Error

In [9]:
#remove rows with any values that are not finite
df_new = df[np.isfinite(df).all(1)]

In [10]:
#view updated DataFrame
print(df_new)

    x1   x2     y
1    2  3.0  78.0
2    2  3.0  85.0
3    4  5.0  88.0
4    2  2.0  72.0
5    1  2.0  69.0
6    5  1.0  94.0
8    2  0.0  88.0
9    4  3.0  92.0
10   4  4.0  90.0


The two rows that had infinite or NaN values have been removed.

#### We can now proceed to fit our linear regression model:

In [11]:
from sklearn.linear_model import LinearRegression

In [12]:
#initiate linear regression model
model = LinearRegression()

In [13]:
#define predictor and response variables
X, y = df_new[['x1', 'x2']], df_new.y

In [14]:
#fit regression model
model.fit(X, y)

LinearRegression()

In [15]:
#print model intercept and coefficients
print(model.intercept_, model.coef_)

69.85144124168515 [ 5.72727273 -0.93791574]


##### Notice that we don’t receive any error this time because we first removed the rows with infinite or NaN values from the DataFrame.