# US Data Wrangling

In [1]:
### reading data in
import pandas as pd
import csv

url = 'https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv'
df = pd.read_csv(url)
###this just gets rid of scientific notation in the pandas display and replaces it with a float to 3 decimals
pd.set_option('display.float_format', lambda x: '%.3f' % x)
#this just turns off an error I kept getting... hahaha
pd.options.mode.chained_assignment = None  # default='warn'
df.head()

Unnamed: 0,date,county,state,fips,cases,deaths
0,2020-01-21,Snohomish,Washington,53061.0,1,0.0
1,2020-01-22,Snohomish,Washington,53061.0,1,0.0
2,2020-01-23,Snohomish,Washington,53061.0,1,0.0
3,2020-01-24,Cook,Illinois,17031.0,1,0.0
4,2020-01-24,Snohomish,Washington,53061.0,1,0.0


In [2]:
#just an option to print 15 rows
pd.set_option('display.max_rows', 15)

## Adding columns we need

In [3]:
### create unique ID for to separate county/st with same name. not all counties have fips codes listed 

df['uniqueId'] = df["county"] +", "+ df["state"]
df.head()

Unnamed: 0,date,county,state,fips,cases,deaths,uniqueId
0,2020-01-21,Snohomish,Washington,53061.0,1,0.0,"Snohomish, Washington"
1,2020-01-22,Snohomish,Washington,53061.0,1,0.0,"Snohomish, Washington"
2,2020-01-23,Snohomish,Washington,53061.0,1,0.0,"Snohomish, Washington"
3,2020-01-24,Cook,Illinois,17031.0,1,0.0,"Cook, Illinois"
4,2020-01-24,Snohomish,Washington,53061.0,1,0.0,"Snohomish, Washington"


In [9]:
### gets list of state/county combos (uniqueId) and iterates through them to ensure they all have the proper # of rows
cleanedData = pd.DataFrame()
listUnique = df.uniqueId.unique()
groups = df.groupby('uniqueId')


for x in listUnique:
    county = groups.get_group(x)
    
    ### need to figure out how to broadly apply over a county, not whole dataset 
    
    ### get daily case counts
    county['daily cases'] = county['cases'].diff()


    ### get daily deaths
    county['daily deaths'] = county['deaths'].diff()

    ###replace null values with nearest non-null value
    try:
        county = county.interpolate(method='nearest')
    except ValueError:
        print(x, 'has an issue')

    dfList= [cleanedData, county]
    
    cleanedData = pd.concat(dfList)
    print(x)
    


Snohomish, Washington
Cook, Illinois
Orange, California
Maricopa, Arizona
Los Angeles, California
Santa Clara, California
Suffolk, Massachusetts
San Francisco, California
Dane, Wisconsin
San Diego, California
Bexar, Texas
Douglas, Nebraska
Humboldt, California
Sacramento, California
Solano, California
Spokane, Washington
Salt Lake, Utah
Marin, California
Napa, California
Sonoma, California
Washington, Oregon
King, Washington
Alameda, California
Hillsborough, Florida
Manatee, Florida
New York City, New York
Unknown, Rhode Island
Placer, California
San Mateo, California
Fulton, Georgia
Norfolk, Massachusetts
Grafton, New Hampshire
Walla Walla, Washington
Contra Costa, California
Wake, North Carolina
Bergen, New Jersey
Westchester, New York
Fort Bend, Texas
Chelan, Washington
Douglas, Colorado
Jefferson, Colorado
Santa Rosa, Florida
Montgomery, Maryland
Middlesex, Massachusetts
Clark, Nevada
Washoe, Nevada
Nassau, New York
Williamson, Tennessee
Harris, Texas
Pinal, Arizona
Yolo, Californi

Lincoln, Arkansas
Nevada, California
Escambia, Florida
Lake, Florida
Forsyth, Georgia
Hall, Georgia
Paulding, Georgia
Troup, Georgia
Peoria, Illinois
Will, Illinois
Bartholomew, Indiana
Floyd, Indiana
Ascension, Louisiana
Unknown, Maine
Frederick, Maryland
Talbot, Maryland
Unknown, Massachusetts
Benton, Minnesota
Blue Earth, Minnesota
Hancock, Mississippi
Monroe, Mississippi
Cass, Missouri
Carroll, New Hampshire
Hunterdon, New Jersey
Somerset, New Jersey
Allegany, New York
Onondaga, New York
Ontario, New York
Durham, North Carolina
Geauga, Ohio
Canadian, Oklahoma
Benton, Oregon
Fairfield, South Carolina
Sevier, Tennessee
Bowie, Texas
Matagorda, Texas
Tooele, Utah
Wasatch, Utah
Orange, Vermont
Stafford, Virginia
York, Virginia
Outagamie, Wisconsin
Wood, Wisconsin
Madison, Alabama
St. Clair, Alabama
Ketchikan Gateway Borough, Alaska
Navajo, Arizona
Kern, California
Monterey, California
Sussex, Delaware
Brevard, Florida
Polk, Florida
Richmond, Georgia
Whitfield, Georgia
Hawaii, Hawaii
Mad

Darlington, South Carolina
Florence, South Carolina
Orangeburg, South Carolina
Pickens, South Carolina
Sumter, South Carolina
Anderson, Tennessee
Blount, Tennessee
Bradley, Tennessee
Dickson, Tennessee
Greene, Tennessee
Hamblen, Tennessee
Maury, Tennessee
Tipton, Tennessee
Brown, Texas
Cass, Texas
Fannin, Texas
Hockley, Texas
Upshur, Texas
Addison, Vermont
Rutland, Vermont
Accomack, Virginia
Gloucester, Virginia
Newport News city, Virginia
Norfolk city, Virginia
Portsmouth city, Virginia
Cowlitz, Washington
San Juan, Washington
Stevens, Washington
Jackson, West Virginia
Kanawha, West Virginia
Chippewa, Wisconsin
Douglas, Wisconsin
Dunn, Wisconsin
Green, Wisconsin
Jefferson, Wisconsin
Marathon, Wisconsin
Rock, Wisconsin
St. Croix, Wisconsin
Campbell, Wyoming
Natrona, Wyoming
Marion, Alabama
Apache, Arizona
Cochise, Arizona
Butte, California
El Dorado, California
Siskiyou, California
Yuba, California
Chaffee, Colorado
Elbert, Colorado
Hinsdale, Colorado
Montrose, Colorado
Bay, Florida
Hi

Mayes, Oklahoma
Wagoner, Oklahoma
Clatsop, Oregon
Armstrong, Pennsylvania
Bradford, Pennsylvania
Carbon, Pennsylvania
Clearfield, Pennsylvania
Juniata, Pennsylvania
Somerset, Pennsylvania
Chesterfield, South Carolina
Brookings, South Dakota
Claiborne, Tennessee
DeKalb, Tennessee
Grundy, Tennessee
Hardin, Tennessee
Lincoln, Tennessee
Madison, Tennessee
Overton, Tennessee
Chambers, Texas
Comal, Texas
Falls, Texas
Grayson, Texas
Hidalgo, Texas
Jefferson, Texas
Nueces, Texas
Parker, Texas
Unknown, Texas
Victoria, Texas
Caledonia, Vermont
Orleans, Vermont
Chesapeake city, Virginia
Halifax, Virginia
Unknown, Virginia
Unknown, Washington
Berkeley, West Virginia
Harrison, West Virginia
Preston, West Virginia
Wood, West Virginia
Sweetwater, Wyoming
Blount, Alabama
Butler, Alabama
Cherokee, Alabama
Chilton, Alabama
Clay, Alabama
Cleburne, Alabama
Colbert, Alabama
Dallas, Alabama
Etowah, Alabama
Lawrence, Alabama
Marshall, Alabama
Pickens, Alabama
Pike, Alabama
Russell, Alabama
Wilcox, Alabama
Dr

Pleasants, West Virginia
Iron, Wisconsin
Marinette, Wisconsin
Richland, Wisconsin
Waupaca, Wisconsin
Goshen, Wyoming
Washakie, Wyoming
Monroe, Alabama
Gila, Arizona
Johnson, Arkansas
Glenn, California
Baca, Colorado
Moffat, Colorado
Hendry, Florida
Bulloch, Georgia
Jenkins, Georgia
Murray, Georgia
Pike, Georgia
Walton, Georgia
Wheeler, Georgia
Bonneville, Idaho
Lincoln, Idaho
Carroll, Illinois
Fayette, Illinois
Macon, Illinois
Crawford, Indiana
Newton, Indiana
Switzerland, Indiana
Vermillion, Indiana
Wabash, Indiana
White, Indiana
Boone, Iowa
Keokuk, Iowa
Shelby, Iowa
Taylor, Iowa
Miami, Kansas
Boyle, Kentucky
Bracken, Kentucky
Breckinridge, Kentucky
Butler, Kentucky
Carroll, Kentucky
Floyd, Kentucky
Grant, Kentucky
Nicholas, Kentucky
Shelby, Kentucky
Washington, Kentucky
East Carroll, Louisiana
Franklin, Louisiana
Osceola, Michigan
Clearwater, Minnesota
Amite, Mississippi
Clarke, Mississippi
Covington, Mississippi
Ripley, Missouri
Warren, Missouri
Gosper, Nebraska
Platte, Nebraska
Hum

Unknown, West Virginia
Crawford, Wisconsin
Menominee, Wisconsin
Coffee, Alabama
Unknown, Alaska
Yukon-Koyukuk Census Area, Alaska
Mississippi, Arkansas
Monroe, Arkansas
Ouachita, Arkansas
Sharp, Arkansas
Del Norte, California
Custer, Colorado
Telfair, Georgia
Washington, Idaho
Logan, Illinois
Macoupin, Illinois
Mercer, Illinois
Moultrie, Illinois
Piatt, Illinois
Blackford, Indiana
Spencer, Indiana
Bremer, Iowa
Jefferson, Iowa
O'Brien, Iowa
Cloud, Kansas
Cowley, Kansas
Bath, Kentucky
Crittenden, Kentucky
Cumberland, Kentucky
Lincoln, Kentucky
Meade, Kentucky
Pendleton, Kentucky
Caldwell, Louisiana
Allegany, Maryland
Dickinson, Michigan
Koochiching, Minnesota
Wayne, Mississippi
Howell, Missouri
New Madrid, Missouri
Colfax, Nebraska
Gage, Nebraska
Hamilton, Nebraska
Otoe, Nebraska
Yates, New York
Dare, North Carolina
Gates, North Carolina
Macon, North Carolina
Mitchell, North Carolina
Stokes, North Carolina
Guernsey, Ohio
Hardin, Ohio
Williams, Ohio
Atoka, Oklahoma
Kingfisher, Oklahoma
Ki

Benton, Indiana
Pike, Indiana
Kearny, Kansas
Stanton, Kansas
Morgan, Kentucky
Owsley, Kentucky
Trigg, Kentucky
Red Lake, Minnesota
Rock, Minnesota
Todd, Minnesota
Dakota, Nebraska
Sully, South Dakota
Walworth, South Dakota
Grant, West Virginia
Mingo, West Virginia
Big Horn, Wyoming
Jackson, Arkansas
Johnson, Illinois
Cass, Iowa
Montmorency, Michigan
Churchill, Nevada
Jackson, North Carolina
Alfalfa, Oklahoma
Hyde, South Dakota
Lake, Tennessee
Van Buren, Tennessee
Motley, Texas
Falls Church city, Virginia
Green Lake, Wisconsin
Clay, Illinois
Dickinson, Kansas
Casey, Kentucky
Garrard, Kentucky
Livingston, Kentucky
Saline, Nebraska
Tyrrell, North Carolina
Johnston, Oklahoma
Jefferson, Oregon
Armstrong, Texas
Duval, Texas
Wilbarger, Texas
Lincoln, West Virginia
Washburn, Wisconsin
Nome Census Area, Alaska
Union, Illinois
Smith, Kansas
Alcona, Michigan
Norman, Minnesota
Daviess, Missouri
Holt, Missouri
Miller, Missouri
Pondera, Montana
Morrill, Nebraska
Harrison, Ohio
Moore, Tennessee
Bosqu

Barton, Missouri
Sioux, Nebraska
Custer, Montana
Rich, Utah
Cook, Minnesota
Mellette, South Dakota
Schleicher, Texas
Allen, Kansas
Dade, Missouri
Bennett, South Dakota
Beaver, Utah
Dickenson, Virginia
Fergus, Montana
Valley, Montana
Sutton, Texas
Upton, Texas
Bland, Virginia
Ozark, Missouri
Wayne, Missouri
Dawson, Montana
Billings, North Dakota
Cavalier, North Dakota
Culberson, Texas
Ellis, Oklahoma
Thomas, Kansas
Menard, Texas
Harlan, Nebraska
Terrell, Texas
Logan, Kansas
Marshall, Kansas
Treasure, Montana
Rock, Nebraska
Denali Borough, Alaska
Jackson, Colorado
Boise, Idaho
Issaquena, Mississippi
Joplin, Missouri
Granite, Montana
Edwards, Texas
Boundary, Idaho
Clearwater, Idaho
Elk, Kansas
Sheridan, Nebraska
Eureka, Nevada
Hettinger, North Dakota
Jeff Davis, Texas
Rush, Kansas
Bear Lake, Idaho
Oneida, Idaho
Robertson, Kentucky
Hooker, Nebraska
Harmon, Oklahoma
Calhoun, Arkansas
Teton, Montana
Kusilvak Census Area, Alaska
Shoshone, Idaho
Scott, Illinois
Sheridan, Montana
Campbell, Sout

## Additional levels of cleaning 

In [10]:
import numpy as np

In [19]:
### clean data by removing dates and unknown counties and clears outliers
cleanedData = cleanedData[cleanedData['date'] > '2020-04-30']
cleanedData = cleanedData[cleanedData['date'] < '2021-01-01']
cleanedData = cleanedData[cleanedData['county'] != 'Unknown']
### remove puerto rico, missing most of the data
cleanedData = cleanedData[cleanedData['state'] != 'Puerto Rico']

### gets rid of negative values and sets equal to zero
cleanedData['daily cases'] = cleanedData['daily cases'].clip(lower=0, upper = 60000)
cleanedData['daily deaths'] = cleanedData['daily deaths'].clip(lower=0, upper = 60000)


listUnique = cleanedData.uniqueId.unique()
for x in listUnique:
    print(x)


Snohomish, Washington
Cook, Illinois
Orange, California
Maricopa, Arizona
Los Angeles, California
Santa Clara, California
Suffolk, Massachusetts
San Francisco, California
Dane, Wisconsin
San Diego, California
Bexar, Texas
Douglas, Nebraska
Humboldt, California
Sacramento, California
Solano, California
Spokane, Washington
Salt Lake, Utah
Marin, California
Napa, California
Sonoma, California
Washington, Oregon
King, Washington
Alameda, California
Hillsborough, Florida
Manatee, Florida
New York City, New York
Placer, California
San Mateo, California
Fulton, Georgia
Norfolk, Massachusetts
Grafton, New Hampshire
Walla Walla, Washington
Contra Costa, California
Wake, North Carolina
Bergen, New Jersey
Westchester, New York
Fort Bend, Texas
Chelan, Washington
Douglas, Colorado
Jefferson, Colorado
Santa Rosa, Florida
Montgomery, Maryland
Middlesex, Massachusetts
Clark, Nevada
Washoe, Nevada
Nassau, New York
Williamson, Tennessee
Harris, Texas
Pinal, Arizona
Yolo, California
Denver, Colorado
Eag

## Testing to ensure data is clean

In [20]:
### save list of unique county/state combos 
np.savetxt('countylist.txt', listUnique, fmt='%s')

In [21]:
### groups dataframe based on uniqueId
groupedCleanedData = cleanedData.groupby('uniqueId')

In [22]:
### this is proof that we can circle through each county dataframe one at a time 
for x in listUnique:
    county = groupedCleanedData.get_group(x)
    print(x, county.isnull().sum())
 
    #print(county.describe())
    #print(county.isnull().sum())

Snohomish, Washington date            0
county          0
state           0
fips            0
cases           0
deaths          0
uniqueId        0
daily cases     0
daily deaths    0
dtype: int64
Cook, Illinois date            0
county          0
state           0
fips            0
cases           0
deaths          0
uniqueId        0
daily cases     0
daily deaths    0
dtype: int64
Orange, California date            0
county          0
state           0
fips            0
cases           0
deaths          0
uniqueId        0
daily cases     0
daily deaths    0
dtype: int64
Maricopa, Arizona date            0
county          0
state           0
fips            0
cases           0
deaths          0
uniqueId        0
daily cases     0
daily deaths    0
dtype: int64
Los Angeles, California date            0
county          0
state           0
fips            0
cases           0
deaths          0
uniqueId        0
daily cases     0
daily deaths    0
dtype: int64
Santa Clara, California dat

dtype: int64
Montcalm, Michigan date            0
county          0
state           0
fips            0
cases           0
deaths          0
uniqueId        0
daily cases     0
daily deaths    0
dtype: int64
St. Clair, Michigan date            0
county          0
state           0
fips            0
cases           0
deaths          0
uniqueId        0
daily cases     0
daily deaths    0
dtype: int64
Washtenaw, Michigan date            0
county          0
state           0
fips            0
cases           0
deaths          0
uniqueId        0
daily cases     0
daily deaths    0
dtype: int64
Dakota, Minnesota date            0
county          0
state           0
fips            0
cases           0
deaths          0
uniqueId        0
daily cases     0
daily deaths    0
dtype: int64
Hennepin, Minnesota date            0
county          0
state           0
fips            0
cases           0
deaths          0
uniqueId        0
daily cases     0
daily deaths    0
dtype: int64
Stearns, Minnes

Hancock, Mississippi date            0
county          0
state           0
fips            0
cases           0
deaths          0
uniqueId        0
daily cases     0
daily deaths    0
dtype: int64
Monroe, Mississippi date            0
county          0
state           0
fips            0
cases           0
deaths          0
uniqueId        0
daily cases     0
daily deaths    0
dtype: int64
Cass, Missouri date            0
county          0
state           0
fips            0
cases           0
deaths          0
uniqueId        0
daily cases     0
daily deaths    0
dtype: int64
Carroll, New Hampshire date            0
county          0
state           0
fips            0
cases           0
deaths          0
uniqueId        0
daily cases     0
daily deaths    0
dtype: int64
Hunterdon, New Jersey date            0
county          0
state           0
fips            0
cases           0
deaths          0
uniqueId        0
daily cases     0
daily deaths    0
dtype: int64
Somerset, New Jersey dat

dtype: int64
Morgan, Colorado date            0
county          0
state           0
fips            0
cases           0
deaths          0
uniqueId        0
daily cases     0
daily deaths    0
dtype: int64
Tolland, Connecticut date            0
county          0
state           0
fips            0
cases           0
deaths          0
uniqueId        0
daily cases     0
daily deaths    0
dtype: int64
Windham, Connecticut date            0
county          0
state           0
fips            0
cases           0
deaths          0
uniqueId        0
daily cases     0
daily deaths    0
dtype: int64
Hernando, Florida date            0
county          0
state           0
fips            0
cases           0
deaths          0
uniqueId        0
daily cases     0
daily deaths    0
dtype: int64
Indian River, Florida date            0
county          0
state           0
fips            0
cases           0
deaths          0
uniqueId        0
daily cases     0
daily deaths    0
dtype: int64
Jackson, Flor

Pickens, South Carolina date            0
county          0
state           0
fips            0
cases           0
deaths          0
uniqueId        0
daily cases     0
daily deaths    0
dtype: int64
Sumter, South Carolina date            0
county          0
state           0
fips            0
cases           0
deaths          0
uniqueId        0
daily cases     0
daily deaths    0
dtype: int64
Anderson, Tennessee date            0
county          0
state           0
fips            0
cases           0
deaths          0
uniqueId        0
daily cases     0
daily deaths    0
dtype: int64
Blount, Tennessee date            0
county          0
state           0
fips            0
cases           0
deaths          0
uniqueId        0
daily cases     0
daily deaths    0
dtype: int64
Bradley, Tennessee date            0
county          0
state           0
fips            0
cases           0
deaths          0
uniqueId        0
daily cases     0
daily deaths    0
dtype: int64
Dickson, Tennessee da

dtype: int64
Madison, New York date            0
county          0
state           0
fips            0
cases           0
deaths          0
uniqueId        0
daily cases     0
daily deaths    0
dtype: int64
St. Lawrence, New York date            0
county          0
state           0
fips            0
cases           0
deaths          0
uniqueId        0
daily cases     0
daily deaths    0
dtype: int64
Cherokee, North Carolina date            0
county          0
state           0
fips            0
cases           0
deaths          0
uniqueId        0
daily cases     0
daily deaths    0
dtype: int64
Cumberland, North Carolina date            0
county          0
state           0
fips            0
cases           0
deaths          0
uniqueId        0
daily cases     0
daily deaths    0
dtype: int64
Davie, North Carolina date            0
county          0
state           0
fips            0
cases           0
deaths          0
uniqueId        0
daily cases     0
daily deaths    0
dtype: int

Jefferson, Montana date            0
county          0
state           0
fips            0
cases           0
deaths          0
uniqueId        0
daily cases     0
daily deaths    0
dtype: int64
Saunders, Nebraska date            0
county          0
state           0
fips            0
cases           0
deaths          0
uniqueId        0
daily cases     0
daily deaths    0
dtype: int64
Cibola, New Mexico date            0
county          0
state           0
fips            0
cases           0
deaths          0
uniqueId        0
daily cases     0
daily deaths    0
dtype: int64
Curry, New Mexico date            0
county          0
state           0
fips            0
cases           0
deaths          0
uniqueId        0
daily cases     0
daily deaths    0
dtype: int64
Chemung, New York date            0
county          0
state           0
fips            0
cases           0
deaths          0
uniqueId        0
daily cases     0
daily deaths    0
dtype: int64
Orleans, New York date          

dtype: int64
Juneau, Wisconsin date            0
county          0
state           0
fips            0
cases           0
deaths          0
uniqueId        0
daily cases     0
daily deaths    0
dtype: int64
Monroe, Wisconsin date            0
county          0
state           0
fips            0
cases           0
deaths          0
uniqueId        0
daily cases     0
daily deaths    0
dtype: int64
Albany, Wyoming date            0
county          0
state           0
fips            0
cases           0
deaths          0
uniqueId        0
daily cases     0
daily deaths    0
dtype: int64
Bullock, Alabama date            0
county          0
state           0
fips            0
cases           0
deaths          0
uniqueId        0
daily cases     0
daily deaths    0
dtype: int64
Choctaw, Alabama date            0
county          0
state           0
fips            0
cases           0
deaths          0
uniqueId        0
daily cases     0
daily deaths    0
dtype: int64
Coosa, Alabama date       

Taylor, Texas date            0
county          0
state           0
fips            0
cases           0
deaths          0
uniqueId        0
daily cases     0
daily deaths    0
dtype: int64
Uvalde, Texas date            0
county          0
state           0
fips            0
cases           0
deaths          0
uniqueId        0
daily cases     0
daily deaths    0
dtype: int64
Willacy, Texas date            0
county          0
state           0
fips            0
cases           0
deaths          0
uniqueId        0
daily cases     0
daily deaths    0
dtype: int64
Young, Texas date            0
county          0
state           0
fips            0
cases           0
deaths          0
uniqueId        0
daily cases     0
daily deaths    0
dtype: int64
Uintah, Utah date            0
county          0
state           0
fips            0
cases           0
deaths          0
uniqueId        0
daily cases     0
daily deaths    0
dtype: int64
Bristol city, Virginia date            0
county         

Clay, Georgia date            0
county          0
state           0
fips            0
cases           0
deaths          0
uniqueId        0
daily cases     0
daily deaths    0
dtype: int64
Cook, Georgia date            0
county          0
state           0
fips            0
cases           0
deaths          0
uniqueId        0
daily cases     0
daily deaths    0
dtype: int64
Dade, Georgia date            0
county          0
state           0
fips            0
cases           0
deaths          0
uniqueId        0
daily cases     0
daily deaths    0
dtype: int64
Dooly, Georgia date            0
county          0
state           0
fips            0
cases           0
deaths          0
uniqueId        0
daily cases     0
daily deaths    0
dtype: int64
Gilmer, Georgia date            0
county          0
state           0
fips            0
cases           0
deaths          0
uniqueId        0
daily cases     0
daily deaths    0
dtype: int64
Jefferson, Georgia date            0
county         

San Augustine, Texas date            0
county          0
state           0
fips            0
cases           0
deaths          0
uniqueId        0
daily cases     0
daily deaths    0
dtype: int64
Wood, Texas date            0
county          0
state           0
fips            0
cases           0
deaths          0
uniqueId        0
daily cases     0
daily deaths    0
dtype: int64
Augusta, Virginia date            0
county          0
state           0
fips            0
cases           0
deaths          0
uniqueId        0
daily cases     0
daily deaths    0
dtype: int64
Sussex, Virginia date            0
county          0
state           0
fips            0
cases           0
deaths          0
uniqueId        0
daily cases     0
daily deaths    0
dtype: int64
Barbour, West Virginia date            0
county          0
state           0
fips            0
cases           0
deaths          0
uniqueId        0
daily cases     0
daily deaths    0
dtype: int64
Crawford, Wisconsin date          

Pender, North Carolina date            0
county          0
state           0
fips            0
cases           0
deaths          0
uniqueId        0
daily cases     0
daily deaths    0
dtype: int64
Emmons, North Dakota date            0
county          0
state           0
fips            0
cases           0
deaths          0
uniqueId        0
daily cases     0
daily deaths    0
dtype: int64
Monroe, Ohio date            0
county          0
state           0
fips            0
cases           0
deaths          0
uniqueId        0
daily cases     0
daily deaths    0
dtype: int64
Sherman, Oregon date            0
county          0
state           0
fips            0
cases           0
deaths          0
uniqueId        0
daily cases     0
daily deaths    0
dtype: int64
Fulton, Pennsylvania date            0
county          0
state           0
fips            0
cases           0
deaths          0
uniqueId        0
daily cases     0
daily deaths    0
dtype: int64
Henderson, Tennessee date      

Tensas, Louisiana date            0
county          0
state           0
fips            0
cases           0
deaths          0
uniqueId        0
daily cases     0
daily deaths    0
dtype: int64
Alpena, Michigan date            0
county          0
state           0
fips            0
cases           0
deaths          0
uniqueId        0
daily cases     0
daily deaths    0
dtype: int64
Lake, Michigan date            0
county          0
state           0
fips            0
cases           0
deaths          0
uniqueId        0
daily cases     0
daily deaths    0
dtype: int64
Becker, Minnesota date            0
county          0
state           0
fips            0
cases           0
deaths          0
uniqueId        0
daily cases     0
daily deaths    0
dtype: int64
Andrew, Missouri date            0
county          0
state           0
fips            0
cases           0
deaths          0
uniqueId        0
daily cases     0
daily deaths    0
dtype: int64
Iron, Missouri date            0
county 

dtype: int64
Harper, Kansas date            0
county          0
state           0
fips            0
cases           0
deaths          0
uniqueId        0
daily cases     0
daily deaths    0
dtype: int64
Hitchcock, Nebraska date            0
county          0
state           0
fips            0
cases           0
deaths          0
uniqueId        0
daily cases     0
daily deaths    0
dtype: int64
Garza, Texas date            0
county          0
state           0
fips            0
cases           0
deaths          0
uniqueId        0
daily cases     0
daily deaths    0
dtype: int64
Glasscock, Texas date            0
county          0
state           0
fips            0
cases           0
deaths          0
uniqueId        0
daily cases     0
daily deaths    0
dtype: int64
Brown, Illinois date            0
county          0
state           0
fips            0
cases           0
deaths          0
uniqueId        0
daily cases     1
daily deaths    1
dtype: int64
Calhoun, Iowa date            0

Kingman, Kansas date            0
county          0
state           0
fips            0
cases           0
deaths          0
uniqueId        0
daily cases     1
daily deaths    1
dtype: int64
Haakon, South Dakota date            0
county          0
state           0
fips            0
cases           0
deaths          0
uniqueId        0
daily cases     1
daily deaths    1
dtype: int64
Webster, West Virginia date            0
county          0
state           0
fips            0
cases           0
deaths          0
uniqueId        0
daily cases     1
daily deaths    1
dtype: int64
Towner, North Dakota date            0
county          0
state           0
fips            0
cases           0
deaths          0
uniqueId        0
daily cases     1
daily deaths    1
dtype: int64
Hickory, Missouri date            0
county          0
state           0
fips            0
cases           0
deaths          0
uniqueId        0
daily cases     1
daily deaths    1
dtype: int64
Logan, North Dakota date  

In [37]:
#there are values that did not interpolate, should be filled with zeros after looking at the data
cleanedData = cleanedData.fillna(0)

###proof we have no null values!
cleanedData.isnull().sum()

date            0
county          0
state           0
fips            0
cases           0
deaths          0
uniqueId        0
daily cases     0
daily deaths    0
dtype: int64

In [30]:
cleanedData.describe()

Unnamed: 0,fips,cases,deaths,daily cases,daily deaths
count,755087.0,755767.0,755767.0,755453.0,755453.0
mean,30438.391,2238.045,59.741,24.933,0.379
std,15272.997,10870.901,476.952,144.104,2.575
min,1001.0,1.0,0.0,0.0,0.0
25%,18169.0,63.0,1.0,0.0,0.0
50%,29157.0,313.0,5.0,3.0,0.0
75%,45091.0,1204.0,25.0,14.0,0.0
max,78030.0,770915.0,25144.0,29174.0,625.0


In [38]:
cleanedData.to_csv('cleanedCovidData.csv')