In [408]:
def plot_main_info(lim, usg, sum_size):
    plt.text(0, 1.4, "Disk Analytics", ha='center', wrap=True, fontsize=70)

    free = (lim - usg) / 1024 / 1024
    other = (usg - sum_size) / 1024 / 1024
    main_info = [free, sum_size, other]

    my_circle=plt.Circle( (0,0), 0.6, color='white')
    patches, texts = plt.pie(main_info, labels=names, wedgeprops = { 'linewidth' : 10, 'edgecolor' : 'white' })
    [txt.set_fontsize(20) for txt in texts]
    p=plt.gcf()
    p.gca().add_artist(my_circle)
    
    if usg >= 3 * lim / 4:
        advice_1 = "Your disk is almost full."
    elif usg >= lim / 2:
        advice_1 = "Your disk is okay."
    else:
        advice_1 = "The disk is almost free."
    
    plt.text(0, -1.5, advice_1, ha='center', wrap=True, fontsize=40)
    plt.savefig('main_info.png', bbox_inches='tight')

 #################   
def plot_top_folders(data, n=10):
    plt.figure(figsize=(12,12))
    folders = data[data['type'] == "folder"]
    top_folders = folders.sort_values(by='sum_size', ascending=False).iloc[1:n + 1]
    children = {}
    for folder in top_folders.index:
        children[folder] = data[data['parent'] == folder]
 

    heights = [top_folders.loc[folder, 'sum_size'] / 1024 / 1024 / len(children[folder]) for folder in top_folders.index]
    bars = top_folders['name']
 
    width = [len(children[folder]) for folder in top_folders.index]
    y_pos = [1]
    for i in range(1, len(width)):
        y_pos.append(y_pos[i - 1] + width[i - 1] / 2 + width[i] / 2 + 30)

    # Make the plot
    plt.bar(y_pos, heights, width=width)
    plt.xticks(y_pos, bars, fontsize=15, rotation=30)
    plt.text(140, 380, 'The higher bar - the more heavy files it contains.', ha='left', wrap=True)
    plt.text(140, 360, 'The broader - the more files it contains.', ha='left', wrap=True)
    plt.text(140, 320, 'Firstfull you should look at the highest ones - you can', ha='left', wrap=True)
    plt.text(170, 300, 'free a lot of space moving only few file from the Drive.', ha='left', wrap=True)
    plt.text(210, 430, 'Most heavy folders.', ha='left', wrap=True, fontsize = 50)

    plt.savefig('top_folders.jpeg')


In [416]:
"""
Scans your Google Drive and analyses files there.

"""
from __future__ import print_function
from handlers import DriveHandler
import matplotlib.pyplot as plt
import re
plt.figure(figsize=(12,12))

credentials = '../secret/client_secret.json'

if __name__ == "__main__":
    print('------------------------------------------')
    print('-------------|Drive Analyzer|-------------')
    print('------------------------------------------')
    email = 'baraltiva@gmail.com'#input("Enter your email adress: ")
    print('------------------------------------------')
    gd_handler = DriveHandler(credentials, email)

    data = gd_handler.scan_drive()
    sum_size = data['sum_size'].max() / 1024 / 1024
    lim, usg = gd_handler.get_drive_info()

    free = (lim - usg) / 1024 / 1024
    other = (usg - sum_size) / 1024 / 1024
    main_info = [free, sum_size, other]
    names = ["Free\n{0:.3f}MB\n".format(free), "in Drive\n{0:.3f}MB\n".format(sum_size), "Other\n{0:.3f}MB\n".format(other)]

    print()
    for name in names:
        print("{0}".format(name))
    print('------------------------------------------')

    
    plt.style.use('seaborn-white')
    plot_main_info(lim, usg, sum_size)
    plot_top_folders(data)
    plot_disc_types(data)
    

------------------------------------------
-------------|Drive Analyzer|-------------
------------------------------------------
------------------------------------------
Getting credentials...
Connecting...
Succesfully authentificated!


Limit: 15360.0


Free
2263.384MB

in Drive
7639.853MB

Other
13096.608MB

------------------------------------------


In [415]:
def calc_size(typ, data, is_sub=False):
    if is_sub:
        key = data['mimeType'].apply(lambda x: x.find(typ) != -1)
        return data[key]['size'].sum()
    else:
        key = data['type'].apply(lambda x: x.find(typ) != -1)
        return data[key]['size'].sum()

def plot_disc_types(data):
    
    keywords = {
            'folder' : ['folder'],
            'video' : ['webm', 'video', 'quicktime'],
            'image' : ['jpeg', 'gif', 'bmp'],
            'doc' : ['text', 'plain', 'document', 'djvu', 'pdf', 'msword']}

    group_names=data['type'].unique()[1:]
    gr = pd.DataFrame({'size' : [calc_size(t, data) for t in group_names], 'types' : group_names})


    subgroup_names=[]
    for l in keywords.values():
        subgroup_names += l
    subgroup_names = subgroup_names[1:]
    sgr = pd.DataFrame({'size' : [calc_size(t, data, True) for t in subgroup_names], 'types' : subgroup_names})

    top = sgr.sort_values(by='size', ascending=False).iloc[:5]

    # Create colors
    a, b, c, d=[plt.cm.Set1, plt.cm.Blues, plt.cm.Greens, plt.cm.Reds]

    fig, ax = plt.subplots(figsize=(12, 12))
    ax.axis('equal')
    mypie, texts = ax.pie(gr['size'].values, radius=0.8, labels=group_names, colors=[a(0.6), b(0.6), c(0.6), d(0.6)] )
    [txt.set_fontsize(20) for txt in texts]

    plt.setp( mypie, width=0.3, edgecolor='white')
    plt.text(0, 1.1, 'Types and how much do they take.', ha='center', wrap=True, fontsize=25)
 
    plt.savefig('disc_types.png', bbox_inches='tight')