In [8]:
import pandas as pd
import psycopg2
import re

def df_nan_filter(df):
    df = df[df["Size"].notnull()]              # remove "Size" row if size is null
    df["Languages"].fillna("EN", inplace=True) # set "EN" as langage if value is null
    df["Price"].fillna(0.0, inplace=True)      # set 0.0 as price if value is null
    df["Average User Rating"].fillna(df["Average User Rating"].median(), inplace=True) # set median as user_rating if value is null
    df["User Rating Count"].fillna(1, inplace=True) #replace nan counts with 1
    df.drop_duplicates(subset ="ID", inplace = True)
    return (df)

import ast

def string_filter(s: str):
    s = ast.literal_eval("b'''%s'''" % s)
    s = s.decode('raw_unicode_escape').encode('ascii', 'ignore')
    s = re.sub('[\t\n\r\v\f]', ' ', s.decode())
    s = re.sub(' +', ' ', s)
    s = s.strip('"')
    return (s)

def change_date_format(date: str):
    tmp = date.split('/')
    return (tmp[2]+"-"+tmp[1]+"-"+tmp[0])
    
df = pd.read_csv("appstore_games.csv")
df = df[["ID", "Name", "Average User Rating",
          "User Rating Count", "Price", "Description",
          "Developer", "Age Rating", "Languages",
          "Size", "Primary Genre", "Genres",
          "Original Release Date", "Current Version Release Date"]]

df = df_nan_filter(df)

for col in df:
    df[col] = df[col].apply(lambda x: string_filter(str(x)))

df["Original Release Date"] = df["Original Release Date"].apply(lambda x: change_date_format(x))
df["Current Version Release Date"] = df["Current Version Release Date"].apply(lambda x: change_date_format(x))

df.to_csv("filtered_appstore_games.csv")

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # This is added back by InteractiveShellApp.init_path()


In [20]:
for e in df:
    print(e, df.loc[0, e])

ID 284921427
Name Sudoku
Average User Rating 4.0
User Rating Count 3553.0
Price 2.99
Description Join over 21,000,000 of our fans and download one of our Sudoku games today! Makers of the Best Sudoku Game of 2008, Sudoku (Free), we offer you the best selling Sudoku game for iPhone with great features and 1000 unique puzzles! Sudoku will give you many hours of fun and puzzle solving. Enjoy the challenge of solving Sudoku puzzles whenever or wherever you are using your iPhone or iPod Touch. OPTIONS All options are on by default, but you can turn them off in the Options menu Show Incorrect :: Shows incorrect answers in red. Smart Buttons :: Disables the number button when that number is completed on the game board. Smart Notes :: Removes the number from the notes in the box, column, and row that contains the cell with your correct answer. FEATURES 1000 unique handcrafted puzzles ALL puzzles solvable WITHOUT guessing Four different skill levels Challenge a friend Multiple color schemes "AL

In [11]:
def delete_table():
    conn = psycopg2.connect(
        database="appstore_games",
        host="localhost",
        user="postgres_user",
        password="12345"
    )
    curr = conn.cursor()
    curr.execute('DROP TABLE appstore_games;')          
    conn.commit()
    conn.close()

def create_table():
    conn = psycopg2.connect(
        database="appstore_games",
        host="localhost",
        user="postgres_user",
        password="12345"
    )
    curr = conn.cursor()
    curr.execute("""CREATE TABLE appstore_games(
             Id bigint PRIMARY KEY,
             Name varchar,
             Avg_user_rating float,
             User_rating_count float,
             Price float,
             Description varchar,
             Developer varchar,
             Age_rating varchar,
             Languages varchar,
             Size float, 
             Primary_genre varchar,
             Genres varchar,
             Release_date date,
             Last_update date
             );""")           
    conn.commit()
    conn.close()

def populate(df):
    conn = psycopg2.connect(
        database="appstore_games",
        host="localhost",
        user="postgres_user",
        password="12345"
    )
    curr = conn.cursor()
    for idx in range(df.shape[0]):
        tmp = df.iloc[idx]
        curr.execute("""INSERT INTO appstore_games
            (Id, Name, Avg_user_rating, User_rating_count, Price, Description, Developer,
            Age_rating, Languages, Size, Primary_genre, Genres, Release_date, Last_update
            ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)""",
                (tmp["ID"], tmp["Name"], tmp["Average User Rating"], tmp["User Rating Count"],
                 tmp["Price"], tmp["Description"], tmp["Developer"], tmp["Age Rating"],
                 tmp["Languages"], tmp["Size"], tmp["Primary Genre"], tmp["Genres"],
                 tmp["Original Release Date"], tmp["Current Version Release Date"]))
    conn.commit()
    conn.close()

def display_table():
    conn = psycopg2.connect(
        database="appstore_games",
        host="localhost",
        user="postgres_user",
        password="12345"
    )
    curr = conn.cursor()
    curr.execute("""SELECT * FROM appstore_games""")
    response = curr.fetchall()
    for row in response[:10]:
        print(row)
    conn.close()

In [12]:
delete_table()
create_table()

In [13]:
populate(df)

In [16]:
display_table()

(284921427, 'Sudoku', 4.0, 3553.0, 2.99, 'Join over 21,000,000 of our fans and download one of our Sudoku games today! Makers of the Best Sudoku Game of 2008, Sudoku (Free), we offer you the best selling Sudoku game for iPhone with great features and 1000 unique puzzles! Sudoku will give you many hours of fun and puzzle solving. Enjoy the challenge of solving Sudoku puzzles whenever or wherever you are using your iPhone or iPod Touch. OPTIONS All options are on by default, but you can turn them off in the Options menu Show Incorrect :: Shows incorrect answers in red. Smart Buttons :: Disables the number button when that number is completed on the game board. Smart Notes :: Removes the number from the notes in the box, column, and row that contains the cell with your correct answer. FEATURES 1000 unique handcrafted puzzles ALL puzzles solvable WITHOUT guessing Four different skill levels Challenge a friend Multiple color schemes "ALL notes": tap the "All notes" button on to show all the