# More Function Exercises

Generalize the Twitter language analysis of the previous exercise. You will do that by including a default argument that takes a column name.

In [13]:
import pandas as pd

tweets_df = pd.read_csv('tweets.csv', index_col='id')
tweets_df.columns

Index(['contributors', 'coordinates', 'created_at', 'entities',
       'extended_entities', 'favorite_count', 'favorited', 'filter_level',
       'geo', 'id_str', 'in_reply_to_screen_name', 'in_reply_to_status_id',
       'in_reply_to_status_id_str', 'in_reply_to_user_id',
       'in_reply_to_user_id_str', 'is_quote_status', 'lang', 'place',
       'possibly_sensitive', 'quoted_status', 'quoted_status_id',
       'quoted_status_id_str', 'retweet_count', 'retweeted',
       'retweeted_status', 'source', 'text', 'timestamp_ms', 'truncated',
       'user'],
      dtype='object')

In [4]:
def count_entries(df, col_name='lang'):
    """Return a dictionary with counts of
    occurrences as value for each key."""

    # Initialize an empty dictionary: cols_count
    cols_count = {}

    # Extract column from DataFrame: col
    col = df[col_name]
    
    # Iterate over the column in DataFrame
    for entry in col:

        # If entry is in cols_count, add 1
        if entry in cols_count.keys():
            cols_count[entry] += 1

        # Else add the entry to cols_count, set the value to 1
        else:
            cols_count[entry] = 1

    # Return the cols_count dictionary
    return cols_count

# Call count_entries(): result1
count_entries(tweets_df)

{'en': 97, 'et': 1, 'und': 2}

In [5]:
count_entries(tweets_df, 'source')

{'<a href="http://twitter.com" rel="nofollow">Twitter Web Client</a>': 24,
 '<a href="http://www.facebook.com/twitter" rel="nofollow">Facebook</a>': 1,
 '<a href="http://twitter.com/download/android" rel="nofollow">Twitter for Android</a>': 26,
 '<a href="http://twitter.com/download/iphone" rel="nofollow">Twitter for iPhone</a>': 33,
 '<a href="http://www.twitter.com" rel="nofollow">Twitter for BlackBerry</a>': 2,
 '<a href="http://www.google.com/" rel="nofollow">Google</a>': 2,
 '<a href="http://twitter.com/#!/download/ipad" rel="nofollow">Twitter for iPad</a>': 6,
 '<a href="http://linkis.com" rel="nofollow">Linkis.com</a>': 2,
 '<a href="http://rutracker.org/forum/viewforum.php?f=93" rel="nofollow">newzlasz</a>': 2,
 '<a href="http://ifttt.com" rel="nofollow">IFTTT</a>': 1,
 '<a href="http://www.myplume.com/" rel="nofollow">PlumeÂ forÂ Android</a>': 1}

You're now going to generalize this function one step further by allowing the user to pass it a flexible argument, that is, in this case, as many column names as the user would like!

In [6]:
def count_entries(df, *args):
    """Return a dictionary with counts of
    occurrences as value for each key."""
    
    #Initialize an empty dictionary: cols_count
    cols_count = {}
    
    # Iterate over column names in args
    for col_name in args:
    
        # Extract column from DataFrame: col
        col = df[col_name]
    
        # Iterate over the column in DataFrame
        for entry in col:
    
            # If entry is in cols_count, add 1
            if entry in cols_count.keys():
                cols_count[entry] += 1
    
            # Else add the entry to cols_count, set the value to 1
            else:
                cols_count[entry] = 1

    # Return the cols_count dictionary
    return cols_count

# Call count_entries(): result1
count_entries(tweets_df, 'lang')

{'en': 97, 'et': 1, 'und': 2}

In [8]:
count_entries(tweets_df, 'lang', 'source')

{'en': 97,
 'et': 1,
 'und': 2,
 '<a href="http://twitter.com" rel="nofollow">Twitter Web Client</a>': 24,
 '<a href="http://www.facebook.com/twitter" rel="nofollow">Facebook</a>': 1,
 '<a href="http://twitter.com/download/android" rel="nofollow">Twitter for Android</a>': 26,
 '<a href="http://twitter.com/download/iphone" rel="nofollow">Twitter for iPhone</a>': 33,
 '<a href="http://www.twitter.com" rel="nofollow">Twitter for BlackBerry</a>': 2,
 '<a href="http://www.google.com/" rel="nofollow">Google</a>': 2,
 '<a href="http://twitter.com/#!/download/ipad" rel="nofollow">Twitter for iPad</a>': 6,
 '<a href="http://linkis.com" rel="nofollow">Linkis.com</a>': 2,
 '<a href="http://rutracker.org/forum/viewforum.php?f=93" rel="nofollow">newzlasz</a>': 2,
 '<a href="http://ifttt.com" rel="nofollow">IFTTT</a>': 1,
 '<a href="http://www.myplume.com/" rel="nofollow">PlumeÂ forÂ Android</a>': 1}

In [16]:
def count_entries(df, *args):
    """Return a dictionary with counts of
    occurrences as value for each key."""

    result = []
    
    # Iterate over column names in args
    for col_name in args:
        #Initialize an empty dictionary: cols_count
        cols_count = {}
    
        # Extract column from DataFrame: col
        col = df[col_name]
    
        # Iterate over the column in DataFrame
        for entry in col:
    
            # If entry is in cols_count, add 1
            if entry in cols_count.keys():
                cols_count[entry] += 1
    
            # Else add the entry to cols_count, set the value to 1
            else:
                cols_count[entry] = 1
                
        # add the col result to the overall result        
        result.append(cols_count)

    # Return the cols_count dictionary
    return result

# Call count_entries(): result1
count_entries(tweets_df, 'lang', 'source', 'id_str', 'place', 'retweet_count')

[{'en': 97, 'et': 1, 'und': 2},
 {'<a href="http://twitter.com" rel="nofollow">Twitter Web Client</a>': 24,
  '<a href="http://www.facebook.com/twitter" rel="nofollow">Facebook</a>': 1,
  '<a href="http://twitter.com/download/android" rel="nofollow">Twitter for Android</a>': 26,
  '<a href="http://twitter.com/download/iphone" rel="nofollow">Twitter for iPhone</a>': 33,
  '<a href="http://www.twitter.com" rel="nofollow">Twitter for BlackBerry</a>': 2,
  '<a href="http://www.google.com/" rel="nofollow">Google</a>': 2,
  '<a href="http://twitter.com/#!/download/ipad" rel="nofollow">Twitter for iPad</a>': 6,
  '<a href="http://linkis.com" rel="nofollow">Linkis.com</a>': 2,
  '<a href="http://rutracker.org/forum/viewforum.php?f=93" rel="nofollow">newzlasz</a>': 2,
  '<a href="http://ifttt.com" rel="nofollow">IFTTT</a>': 1,
  '<a href="http://www.myplume.com/" rel="nofollow">PlumeÂ forÂ Android</a>': 1},
 {714960401759387648: 1,
  714960401977319424: 1,
  714960402426236928: 1,
  71496040236