In [11]:
def get_allbud_urls(filename, columns):
    """
    Reads as file and fetches one of the columns, returning a list of datapoints in that column.

    Parameters
    ----------
    params: tuple-like object
        (path, column)
        path: string or path instance to .csv or .txt file containing urls
        column: column name

    Returns
    -------
    list
        Returns list of URL's in that column
    """
    import pandas as pd
    import os
    path = os.path.join(os.getcwd(), filename)
    
    
    data = pd.read_csv(path)

    return data[columns].tolist()

In [14]:
get_allbud_urls('allbuds_1.csv', 'desc_url')

['https://www.allbud.com/marijuana-strains/indica-dominant-hybrid/banana-og',
 'https://www.allbud.com/marijuana-strains/indica-dominant-hybrid/cherry-pie',
 'https://www.allbud.com/marijuana-strains/indica-dominant-hybrid/blueberry-diesel',
 'https://www.allbud.com/marijuana-strains/indica/northern-lights',
 'https://www.allbud.com/marijuana-strains/hybrid/nerds',
 'https://www.allbud.com/marijuana-strains/sativa-dominant-hybrid/citrus-farmer',
 'https://www.allbud.com/marijuana-strains/sativa-dominant-hybrid/super-silver-blue-magoo',
 'https://www.allbud.com/marijuana-strains/hybrid/mac1',
 'https://www.allbud.com/marijuana-strains/sativa/jack-herer',
 'https://www.allbud.com/marijuana-strains/indica/white-rhino',
 'https://www.allbud.com/marijuana-strains/sativa-dominant-hybrid/laughing-buddha',
 'https://www.allbud.com/marijuana-strains/hybrid/godfather-og-kush',
 'https://www.allbud.com/marijuana-strains/sativa-dominant-hybrid/sour-diesel',
 'https://www.allbud.com/marijuana-strai

## Manual csv tools

### allbuds_1.csv processing (First Step Processing for Image and Description URLs)

In [2]:
import pandas as pd
import os


path = os.path.join(os.getcwd(), 'allbuds_1.csv')
data = pd.read_csv(path, header=None)
data.head()

Unnamed: 0,0,1
0,https://media.allbud.com/resized/350x242/media...,https://www.allbud.com/marijuana-strains/indic...
1,https://media.allbud.com/resized/350x242/media...,https://www.allbud.com/marijuana-strains/indic...
2,https://media.allbud.com/resized/350x242/media...,https://www.allbud.com/marijuana-strains/indic...
3,https://media.allbud.com/resized/350x242/media...,https://www.allbud.com/marijuana-strains/indic...
4,https://media.allbud.com/resized/350x242/media...,https://www.allbud.com/marijuana-strains/hybri...


In [3]:
data.columns = ['image_url', 'desc_url']
data.head()

Unnamed: 0,image_url,desc_url
0,https://media.allbud.com/resized/350x242/media...,https://www.allbud.com/marijuana-strains/indic...
1,https://media.allbud.com/resized/350x242/media...,https://www.allbud.com/marijuana-strains/indic...
2,https://media.allbud.com/resized/350x242/media...,https://www.allbud.com/marijuana-strains/indic...
3,https://media.allbud.com/resized/350x242/media...,https://www.allbud.com/marijuana-strains/indic...
4,https://media.allbud.com/resized/350x242/media...,https://www.allbud.com/marijuana-strains/hybri...


In [4]:
data.to_csv('allbuds_1.csv')

### allbuds_2.csv processing (First Step Processing for primary data file)

In [10]:
import pandas as pd
import os


path = os.path.join(os.getcwd(), 'allbuds_strain_data.csv')
data = pd.read_csv(path, header=None)
data.head()

Unnamed: 0,0,1,2,3,4,5,6,7
0,Sugar Cane,Sugar Cane is a rare slightly sativa dominant ...,Sativa Dominant Hybrid,40% Indica,60% Sativa,20.0,Earthy Sweet Candy Grape Spicy Fruity Herbal P...,Body High Cerebral Creative Energizing Relaxin...
1,MAC1,"MAC 1, also known as “Miracle Alien Cookies X1...",Hybrid,50% Indica,50% Sativa,21.5,Sweet Diesel Sour Spicy Herbal Pungent,Creative Euphoria Happy Motivation Relaxing Up...
2,Chemdawg,With a near-even balance between sativa and in...,Indica Dominant Hybrid,55% Indica,45% Sativa,19.0,Earthy Pungent Chemical Diesel Pine,Cerebral Creative Euphoria Happy Relaxing Cere...
3,Jack Herer,Jack Herer is easily one of the best-known str...,Sativa,,,23.0,Earthy Sweet Spicy Herbal Lemon Pine Woody,Body High Cerebral Creative Euphoria Happy Bod...
4,Nerds,"Nerds, also known as “Nerdz,” is an evenly bal...",Hybrid,50% Indica,50% Sativa,15.5,Earthy Sweet Grape Spicy Herbal Fruity Berry W...,Cerebral Creative Euphoria Focus Relaxing Cere...


In [12]:
data.columns

Int64Index([0, 1, 2, 3, 4, 5, 6, 7], dtype='int64')

In [15]:
data.columns = [
    'strain_name',
    'strain_desc',
    'strain_type',
    'strain_percent_indica',
    'strain_percent_sativa',
    'strain_thc_percent',
    'strain_flavors',
    'strain_effects',
               ]
data.head()

Unnamed: 0,strain_name,strain_desc,strain_type,strain_percent_indica,strain_percent_sativa,strain_thc_percent,strain_flavors,strain_effects
0,Sugar Cane,Sugar Cane is a rare slightly sativa dominant ...,Sativa Dominant Hybrid,40% Indica,60% Sativa,20.0,Earthy Sweet Candy Grape Spicy Fruity Herbal P...,Body High Cerebral Creative Energizing Relaxin...
1,MAC1,"MAC 1, also known as “Miracle Alien Cookies X1...",Hybrid,50% Indica,50% Sativa,21.5,Sweet Diesel Sour Spicy Herbal Pungent,Creative Euphoria Happy Motivation Relaxing Up...
2,Chemdawg,With a near-even balance between sativa and in...,Indica Dominant Hybrid,55% Indica,45% Sativa,19.0,Earthy Pungent Chemical Diesel Pine,Cerebral Creative Euphoria Happy Relaxing Cere...
3,Jack Herer,Jack Herer is easily one of the best-known str...,Sativa,,,23.0,Earthy Sweet Spicy Herbal Lemon Pine Woody,Body High Cerebral Creative Euphoria Happy Bod...
4,Nerds,"Nerds, also known as “Nerdz,” is an evenly bal...",Hybrid,50% Indica,50% Sativa,15.5,Earthy Sweet Grape Spicy Herbal Fruity Berry W...,Cerebral Creative Euphoria Focus Relaxing Cere...


In [16]:
data.to_csv('allbuds_strain_data.csv')