In [1]:
# else function in for loop

def find(seq, target):
    """
    This to demonstrate the 'else' function in a for loop. 
    Using 'else' in for loop to do an action if there was
    no break found in the loop.

    """
    for i, value in enumerate(seq):
        if value == target:
            break
    else:
        return -1

    return i

## DICTIONARY AND DEFAULTDICT

In [3]:
# LOOPING OVER DICTIONARY KEYS AND DELETE KEYS WITH CRITERIA

d = {'a': 1, 'a3': 3, 'b': 4, 'r4':5}
# instead of looping over each keys and delete items
d = {k: d[k] for k in d if not k.startswith('r')}

d

{'a': 1, 'a3': 3, 'b': 4}

In [5]:
# COUNTING WITH DICTIONARY

from collections import defaultdict
colors = ['red', 'green', 'blue', 'blue', 'red', 'red']
d = {}

# First method
for color in colors:
    d[color] = d.get(color, 0) + 1

# Using default dict:
d = defaultdict(int)

for color in colors:
    d[color] += 1  # If color is not exists yet,
    
d

defaultdict(int, {'red': 3, 'green': 1, 'blue': 2})

In [None]:
# GROUPING WITH DICTIONARY
colors = ['red', 'green', 'blue', 'blue', 'red', 'red']
d = defaultdict(list)
for color in colors:
    key = len(color)
    d[key].append(color)

In [6]:
# DICTIONARY POPITEM

# returns an arbitrary element (key, value) pair from the dictionary
# removes an arbitrary element (the same element which is returned) from the dictionary.

person = {'name': 'Phill', 'age': 22, 'salary': 3500.0}

result = person.popitem()
print('person = ', person)
print('Return Value = ', result)

person =  {'name': 'Phill', 'age': 22}
Return Value =  ('salary', 3500.0)


## DATE & TIME

In [None]:
# CONVERT TIMESTAMP FROM UTC TO OTHER TIMEZONE 

def convert_time_from_utc(timestamp, to_time_zone='Australia/Melbourne', output_format=None):
    """
    Convert utc timestamp to other timestamp

    Args:
        timestamp: datetime object or string present datetime.
                    if the input is string it must follow 'YYYY-MM-DD HH:MM:SS'

        to_time_zone: to the desire timezone
        output_format: 'datetime' or string format of time
                    If set as None, then function return exactly the same type as input.

    Returns:
        datetime object or string presenting datetime in new time zone
    """

    if output_format is None:
        if type(timestamp) is str:
            output_format = 'str'
            timestamp = datetime.strptime(timestamp, '%Y-%m-%d %H:%M:%S')
        elif (type(timestamp) is datetime) or (type(timestamp) is datetime.datetime):
            output_format = 'datetime'
        else:
            raise ValueError(
                'input timestamp must either string YYYY-MM-DD HH:MM:SS or a datetime object')

    utc_tz = tz.gettz('UTC')
    to_tz = tz.gettz(to_time_zone)

    output = timestamp.replace(tzinfo=utc_tz)
    output = output.astimezone(to_tz)

    if output_format == 'datetime':
        return output
    elif output_format == 'str':
        return datetime.strftime(output, '%Y-%m-%d %H:%M:%S')
    else:
        return datetime.strftime(output, output_format)

### Timestamp

In [7]:
# EPOCH: timestamp from 1/1/1970
import time

time.time()

1565014626.918981

In [10]:
from datetime import datetime
dtnow = datetime.fromtimestamp(time.time())
dtnow

datetime.datetime(2019, 8, 6, 0, 18, 18, 455068)

In [13]:
dtnow.year, dtnow.month, dtnow.hour

(2019, 8, 0)

### UTC

In [15]:
# GET TODAY DATE IN UTC TIME ZONE
from datetime import datetime, timezone, timedelta

datetime.now(timezone.utc)

datetime.datetime(2019, 8, 5, 14, 19, 53, 799258, tzinfo=datetime.timezone.utc)

In [16]:
# 5 days before current time:
datetime.today() + timedelta(days=-5)

datetime.datetime(2019, 8, 1, 0, 20, 12, 154164)

## DATAFRAME

### Index

In [17]:
# SET MULTI-LEVEL INDEXING

import pandas as pd

purchase_1 = pd.Series({'Name': 'Chris',
                        'Item Purchased': 'Dog Food',
                        'Cost': 22.50})
purchase_2 = pd.Series({'Name': 'Kevyn',
                        'Item Purchased': 'Kitty Litter',
                        'Cost': 2.50})
purchase_3 = pd.Series({'Name': 'Vinod',
                        'Item Purchased': 'Bird Seed',
                        'Cost': 5.00})

df = pd.DataFrame([purchase_1, purchase_2, purchase_3], index=['Store 1', 'Store 1', 'Store 2'])

# Set multi-level index
df = df.set_index([df.index, 'Name'])

# Rename the index
df.index.names = ['Location', 'Name']

# Demonstrate adding record
df = df.append(pd.Series(data={'Cost': 3.00, 'Item Purchased': 'Kitty Food'}, name=('Store 2', 'Kevyn')))
df


Unnamed: 0_level_0,Unnamed: 1_level_0,Item Purchased,Cost
Location,Name,Unnamed: 2_level_1,Unnamed: 3_level_1
Store 1,Chris,Dog Food,22.5
Store 1,Kevyn,Kitty Litter,2.5
Store 2,Vinod,Bird Seed,5.0
Store 2,Kevyn,Kitty Food,3.0


In [20]:
df.loc['Store 1','Chris']

Item Purchased    Dog Food
Cost                  22.5
Name: (Store 1, Chris), dtype: object

### Index of Maximum value in a column

In [None]:
df['col'].idxmax()

### Method Chaining

https://tomaugspurger.github.io/method-chaining.html

https://www.datacamp.com/community/tutorials/pandas-idiomatic

https://towardsdatascience.com/the-unreasonable-effectiveness-of-method-chaining-in-pandas-15c2109e3c69

<p>Method chaining, where you call methods on an object one after another, is in vogue at the moment.
It's always been a style of programming that's been possible with pandas,
and over the past several releases, we've added methods that enable even more chaining.</p>

<ul>
<li><a href="http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.assign.html">assign</a> (0.16.0): For adding new columns to a DataFrame in a chain (inspired by dplyr's <code>mutate</code>)</li>
<li><a href="http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.pipe.html">pipe</a> (0.16.2): For including user-defined methods in method chains.</li>
<li><a href="http://pandas.pydata.org/pandas-docs/version/0.18.0/whatsnew.html#changes-to-rename">rename</a> (0.18.0): For altering axis names (in additional to changing the actual labels as before).</li>
<li><a href="http://pandas.pydata.org/pandas-docs/version/0.18.0/whatsnew.html#window-functions-are-now-methods">Window methods</a> (0.18): Took the top-level <code>pd.rolling_*</code> and <code>pd.expanding_*</code> functions and made them <code>NDFrame</code> methods with a <code>groupby</code>-like API.</li>
<li><a href="http://pandas.pydata.org/pandas-docs/version/0.18.0/whatsnew.html#resample-api">Resample</a> (0.18.0) Added a new <code>groupby</code>-like API</li>
<li><a href="https://github.com/pydata/pandas/pull/12539">.where/mask/Indexers accept Callables</a> (0.18.1): In the next release you'll be able to pass a callable to the indexing methods, to be evaluated within the DataFrame's context (like <code>.query</code>, but with code instead of strings).</li>
</ul>
<p>My scripts will typically start off with large-ish chain at the start getting things into a manageable state.
It's good to have the bulk of your munging done with right away so you can start to do Science™:</p>

In [None]:

(
census_df.query("SUMLEV ==50") # Get State level summary
         .sort_values(['CENSUS2010POP'], ascending = False) # Sort each county by population desc
         .groupby('STNAME') # Group by State name to get the top 3 counties
         .head(3)
         .groupby('STNAME') # Group by State name to get total population of each State by the top 3 counties
         .sum()
         .sort_values(['CENSUS2010POP'], ascending = False) # Order state by population
         .head(3)
         .index # Return the name of the states (as the index now)
)