In [1]:
import pandas as pd
import os

## Obtain different dataset data.gov

https://catalog.data.gov/dataset

In [2]:
from six.moves import urllib

# where the datasets will be placed
ROOT_DATA = "../../../ROOT_DATA/data_gov/"

def fetch_data_from_URL(housing_url, file_name, sub_dir="tmp", root_path=ROOT_DATA):
    placement_dir = os.path.join(root_path, sub_dir)
    if not os.path.isdir(placement_dir):
        os.makedirs(placement_dir)
    placement_path = os.path.join(placement_dir, file_name)
    # only download if not already present
    if not os.path.isfile(placement_path):
        urllib.request.urlretrieve(housing_url, placement_path)
    return placement_path

In [3]:
# working with excel: `conda install -c anaconda xlrd`
fsa_xlsx_path = fetch_data_from_URL("https://ifap.ed.gov/fedschcodelist/attachments/1617FedSchoolCodeList.xlsx",
                    "1617FedSchoolCodeList.xlsx", sub_dir="FSA")

In [4]:
df = pd.read_excel(fsa_xlsx_path, index_col="ID")
print(df.info())

<class 'pandas.core.frame.DataFrame'>
Int64Index: 6962 entries, 25969 to 32930
Data columns (total 9 columns):
SchoolCode    6962 non-null object
SchoolName    6962 non-null object
Address       6962 non-null object
City          6962 non-null object
StateCode     6962 non-null object
ZipCode       6962 non-null int64
Province      111 non-null object
Country       402 non-null object
PostalCode    229 non-null object
dtypes: int64(1), object(8)
memory usage: 543.9+ KB
None


In [5]:
print(df.head())

      SchoolCode                             SchoolName  \
ID                                                        
25969     B04724        WIDENER UNIV SCHOOL OF LAW - DE   
25970     B06171    CENTER FOR ADVANCED STUDIES OF PUER   
25971     B06511       PENTECOSTAL THEOLOGICAL SEMINARY   
25972     B07022  THE CHICAGO SCHOOL OF PROF PSYCHOLOGY   
25973     B07624   NATIONAL COLLEGE OF NATURAL MEDICINE   

                             Address        City StateCode  ZipCode Province  \
ID                                                                             
25969  4601 CONCORD PIKE/PO BOX 7474  WILMINGTON        DE    19803      NaN   
25970                     BOX S-4467    SAN JUAN        PR      902      NaN   
25971                    PO BOX 3330   CLEVELAND        TN    37320      NaN   
25972         325 NORTH WELLS STREET     CHICAGO        IL    60610      NaN   
25973                  049 SW PORTER    PORTLAND        OR    97201      NaN   

      Country PostalCode 

In [6]:
# TODO: more about ix
# ix combines loc and iloc
print(df.ix[25969])

SchoolCode                             B04724
SchoolName    WIDENER UNIV SCHOOL OF LAW - DE
Address         4601 CONCORD PIKE/PO BOX 7474
City                               WILMINGTON
StateCode                                  DE
ZipCode                                 19803
Province                                  NaN
Country                                   NaN
PostalCode                                NaN
Name: 25969, dtype: object


In [7]:
print(df.ix[25969, "City"])

WILMINGTON


In [8]:
df.ix[25969, "City"] = "WILLMINGTON"
print(df.ix[25969, "City"])

WILLMINGTON


In [9]:
print(df.ix[25969, ["City", "StateCode", "ZipCode"]])

City         WILLMINGTON
StateCode             DE
ZipCode            19803
Name: 25969, dtype: object


In [10]:
df.ix[25969, ["City", "StateCode", "ZipCode"]] = ["PARKFALLS","KS","00000"]
print(df.ix[25969, ["City", "StateCode", "ZipCode"]])

City         PARKFALLS
StateCode           KS
ZipCode          00000
Name: 25969, dtype: object


## Assign Multiple Rows

In [11]:
# if we opperated on this, we wouldn't be affecting the root df
mask = df["StateCode"] == "WY"

In [12]:
df2 = df[mask]
df2["StateCode"] = "wyoming"

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [13]:
# unchanged
df[mask]

Unnamed: 0_level_0,SchoolCode,SchoolName,Address,City,StateCode,ZipCode,Province,Country,PostalCode
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
29113,3928,CASPER COLLEGE,125 COLLEGE DRIVE,CASPER,WY,82601,,,
29114,3929,EASTERN WYOMING COLLEGE,3200 WEST C STREET,TORRINGTON,WY,82240,,,
29115,3930,SHERIDAN COLLEGE,3059 COFFEEN AVE,SHERIDAN,WY,82801,,,
29116,3931,NORTHWEST COLLEGE,231 W 6TH ST,POWELL,WY,82435,,,
29117,3932,UNIVERSITY OF WYOMING,DEPARTMENT 3335,LARAMIE,WY,82071,,,
29118,3933,WESTERN WYOMING COMMUNITY COLLEGE,PO BOX 428,ROCK SPRINGS,WY,82902,,,
29287,5018,CENTRAL WYOMING COLLEGE,2660 PECK AVE,RIVERTON,WY,82501,,,
29806,9157,WYOTECH,4373 NORTH 3RD STREET,LARAMIE,WY,82072,,,
29820,9259,LARAMIE COUNTY COMMUNITY CLG,1400 EAST COLLEGE DR,CHEYENNE,WY,82007,,,
30795,15625,CHEEKS INTERNATIONAL ACDMY,207 W 18TH STREET,CHEYENNE,WY,82001,,,


In [14]:
# this is different, we now have a view of the df
df.ix[mask]

Unnamed: 0_level_0,SchoolCode,SchoolName,Address,City,StateCode,ZipCode,Province,Country,PostalCode
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
29113,3928,CASPER COLLEGE,125 COLLEGE DRIVE,CASPER,WY,82601,,,
29114,3929,EASTERN WYOMING COLLEGE,3200 WEST C STREET,TORRINGTON,WY,82240,,,
29115,3930,SHERIDAN COLLEGE,3059 COFFEEN AVE,SHERIDAN,WY,82801,,,
29116,3931,NORTHWEST COLLEGE,231 W 6TH ST,POWELL,WY,82435,,,
29117,3932,UNIVERSITY OF WYOMING,DEPARTMENT 3335,LARAMIE,WY,82071,,,
29118,3933,WESTERN WYOMING COMMUNITY COLLEGE,PO BOX 428,ROCK SPRINGS,WY,82902,,,
29287,5018,CENTRAL WYOMING COLLEGE,2660 PECK AVE,RIVERTON,WY,82501,,,
29806,9157,WYOTECH,4373 NORTH 3RD STREET,LARAMIE,WY,82072,,,
29820,9259,LARAMIE COUNTY COMMUNITY CLG,1400 EAST COLLEGE DR,CHEYENNE,WY,82007,,,
30795,15625,CHEEKS INTERNATIONAL ACDMY,207 W 18TH STREET,CHEYENNE,WY,82001,,,


In [15]:
df.ix[mask, "StateCode"] = "Wyoming"

In [16]:
# now modified
df[mask]

Unnamed: 0_level_0,SchoolCode,SchoolName,Address,City,StateCode,ZipCode,Province,Country,PostalCode
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
29113,3928,CASPER COLLEGE,125 COLLEGE DRIVE,CASPER,Wyoming,82601,,,
29114,3929,EASTERN WYOMING COLLEGE,3200 WEST C STREET,TORRINGTON,Wyoming,82240,,,
29115,3930,SHERIDAN COLLEGE,3059 COFFEEN AVE,SHERIDAN,Wyoming,82801,,,
29116,3931,NORTHWEST COLLEGE,231 W 6TH ST,POWELL,Wyoming,82435,,,
29117,3932,UNIVERSITY OF WYOMING,DEPARTMENT 3335,LARAMIE,Wyoming,82071,,,
29118,3933,WESTERN WYOMING COMMUNITY COLLEGE,PO BOX 428,ROCK SPRINGS,Wyoming,82902,,,
29287,5018,CENTRAL WYOMING COLLEGE,2660 PECK AVE,RIVERTON,Wyoming,82501,,,
29806,9157,WYOTECH,4373 NORTH 3RD STREET,LARAMIE,Wyoming,82072,,,
29820,9259,LARAMIE COUNTY COMMUNITY CLG,1400 EAST COLLEGE DR,CHEYENNE,Wyoming,82007,,,
30795,15625,CHEEKS INTERNATIONAL ACDMY,207 W 18TH STREET,CHEYENNE,Wyoming,82001,,,
