# Sets

Unordered sequence of unique elements

## Creating Sets

In [3]:
list_integers = [2, 3, 3, 6, 4, 2, 5, 1]

list_integers

[2, 3, 3, 6, 4, 2, 5, 1]

### Using `set()` - the argument you pass to it should be an iterable/sequence/collection of items/values

In [4]:
set_items = set(list_integers)

set_items

{1, 2, 3, 4, 5, 6}

### By default Curly braces are used for dictionaries

In [5]:
set_items_2 = {}

type(set_items_2)

dict

### Using `{}` - the element inside the curly braces must be unique

In [6]:
set_items_3 = {1, 2, 3, 4, 5}

set_items_3, type(set_items_3)

({1, 2, 3, 4, 5}, set)

In [8]:
tuple_of_integers = tuple(range(6))

tuple_of_integers

(0, 1, 2, 3, 4, 5)

In [9]:
set_items_4 = {tuple_of_integers}

set_items_4

{(0, 1, 2, 3, 4, 5)}

In [12]:
list_of_integers_2 = list(range(6))

list_of_integers_2

[0, 1, 2, 3, 4, 5]

In [13]:
set_items_5 = {list_of_integers_2}

set_items_5

TypeError: unhashable type: 'list'

In [14]:
set_items_6 = {1, 2, 2, 0.5, "Arcane", "100", "Arcane"}

set_items_6, type(set_items_6)

({0.5, 1, '100', 2, 'Arcane'}, set)

---

## The difference between using `set()` and `{}` in creating sets

The set() function accepts iterables

Set() accepts immutale and mutable iterables

{} accepts values and they are placed into it as they are.

{} accepts immutable types

In [15]:
set(list_integers)

{1, 2, 3, 4, 5, 6}

In [16]:
{1, 2, 4, 5, 6, 5, 5, 6, 7, 7}

{1, 2, 4, 5, 6, 7}

In [17]:
{"Raspberry"}

{'Raspberry'}

In [18]:
set("Raspberry")

{'R', 'a', 'b', 'e', 'p', 'r', 's', 'y'}

In [19]:
set([1, 2, 3, 3, 4])

{1, 2, 3, 4}

In [20]:
{[1, 2, 3, 3, 4]}

TypeError: unhashable type: 'list'

In [21]:
dictionary = {"name": "Kwan", "age": 15}

dictionary

{'name': 'Kwan', 'age': 15}

In [22]:
set(dictionary)

{'age', 'name'}

In [23]:
{dictionary}

TypeError: unhashable type: 'dict'

### Membership Operators

In [24]:
set_items

{1, 2, 3, 4, 5, 6}

In [25]:
2 in set_items

True

In [26]:
3 not in set_items

False

## Performing Set Operations

### Using Union

In [27]:
set_items

{1, 2, 3, 4, 5, 6}

In [28]:
set_item_5 = {6, 7, 8, 9, 10}

set_item_5

{6, 7, 8, 9, 10}

In [29]:
set_items | set_item_5

{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}

In [30]:
set_items.union(set_item_5)

{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}

---

In [31]:
set_A = set(range(5, 50, 5))

set_A

{5, 10, 15, 20, 25, 30, 35, 40, 45}

In [32]:
set_A | {2, 4, 6, 8}

{2, 4, 5, 6, 8, 10, 15, 20, 25, 30, 35, 40, 45}

In [33]:
set_A | [2, 4, 6, 8]

TypeError: unsupported operand type(s) for |: 'set' and 'list'

In [34]:
set_A.union([2, 4, 6, 8, 8, 2, 9])

{2, 4, 5, 6, 8, 9, 10, 15, 20, 25, 30, 35, 40, 45}

---

## Explore these operators

* intersection --- &

* symmetric_difference

* difference

* issubset

---

## Modifying a Set

A set itself can be modified but the elements within/contained in the set must be of an immutable type...

That is to say the elements cannot be changed

In [35]:
list_of_student_scores = [1, 2, 3, 3, 4, 2, 5, 10, 10] # --- this is a list

list_of_student_scores

[1, 2, 3, 3, 4, 2, 5, 10, 10]

In [36]:
set_of_student_scores = set(list_of_student_scores)

set_of_student_scores

{1, 2, 3, 4, 5, 10}

### Immutability test

In [37]:
list_of_student_scores

[1, 2, 3, 3, 4, 2, 5, 10, 10]

In [38]:
list_of_student_scores[0] = 16

list_of_student_scores

[16, 2, 3, 3, 4, 2, 5, 10, 10]

In [42]:
set_of_student_scores[0] = 16

TypeError: 'set' object does not support item assignment

## Argumented Operators and Methods

### Update

In [43]:
set_of_student_scores.update([25, 20, 25, 20, 0, 30])

set_of_student_scores

{0, 1, 2, 3, 4, 5, 10, 20, 25, 30}

In [44]:
set_of_student_scores |= {100, 40, 55, 40, 0.5}

set_of_student_scores

{0, 0.5, 1, 2, 3, 4, 5, 10, 20, 25, 30, 40, 55, 100}

## Explore these Argumented Operators find extra 2

intersection_update

## Explore These Set Methods

* add
* remove
* clear

## Use Case

In [45]:
import random

In [46]:
idproducer = random.sample(range(10000, 160000), 150000)

In [47]:
type(idproducer)

list

In [48]:
set(idproducer) # pandas.Series.unique()

{10000,
 10001,
 10002,
 10003,
 10004,
 10005,
 10006,
 10007,
 10008,
 10009,
 10010,
 10011,
 10012,
 10013,
 10014,
 10015,
 10016,
 10017,
 10018,
 10019,
 10020,
 10021,
 10022,
 10023,
 10024,
 10025,
 10026,
 10027,
 10028,
 10029,
 10030,
 10031,
 10032,
 10033,
 10034,
 10035,
 10036,
 10037,
 10038,
 10039,
 10040,
 10041,
 10042,
 10043,
 10044,
 10045,
 10046,
 10047,
 10048,
 10049,
 10050,
 10051,
 10052,
 10053,
 10054,
 10055,
 10056,
 10057,
 10058,
 10059,
 10060,
 10061,
 10062,
 10063,
 10064,
 10065,
 10066,
 10067,
 10068,
 10069,
 10070,
 10071,
 10072,
 10073,
 10074,
 10075,
 10076,
 10077,
 10078,
 10079,
 10080,
 10081,
 10082,
 10083,
 10084,
 10085,
 10086,
 10087,
 10088,
 10089,
 10090,
 10091,
 10092,
 10093,
 10094,
 10095,
 10096,
 10097,
 10098,
 10099,
 10100,
 10101,
 10102,
 10103,
 10104,
 10105,
 10106,
 10107,
 10108,
 10109,
 10110,
 10111,
 10112,
 10113,
 10114,
 10115,
 10116,
 10117,
 10118,
 10119,
 10120,
 10121,
 10122,
 10123,
 10124,


In [49]:
len(set(idproducer)) # pandas.Series.nunique()

150000

In [50]:
import pandas as pd

In [51]:
# Good way
relative_path = "./data/farm_data.csv"

# Bad way
absolute_path = r"C:\Users\mholdbrook01\Desktop\py-ecom\data\farm_data.csv"

In [52]:
farm_data = pd.read_csv(filepath_or_buffer=relative_path)

farm_data.head()

Unnamed: 0,IDFARM,FARM_TOTAL_AREA_GPS,POINT_LATITUDE,POINT_LONGITUDE,POINT_DATE
0,88797,0.0,6.153048,-2.673048,07/01/2016 13:11:59
1,88797,0.0,6.152918,-2.672938,07/01/2016 13:11:40
2,88797,0.0,6.152764,-2.672745,07/01/2016 13:11:10
3,88797,0.0,6.15271,-2.672877,07/01/2016 13:10:54
4,88797,0.0,6.152379,-2.672898,07/01/2016 13:10:21


In [53]:
set(farm_data["IDFARM"])

{390,
 808,
 6303,
 16576,
 40818,
 50728,
 74971,
 87548,
 88795,
 88797,
 90022,
 91501,
 92358,
 110567,
 112369,
 114311,
 114312,
 126120,
 126433,
 126601,
 127185,
 127693,
 127796,
 129601,
 129695,
 129699,
 132739,
 134820,
 140602,
 161910,
 188788,
 188790,
 203705,
 203723,
 203736,
 204311,
 204312,
 210080,
 213476,
 213488,
 213774,
 214141,
 220469,
 227135,
 227183,
 243301,
 243332,
 243524,
 249666,
 249686,
 262540,
 277981,
 284383,
 284386,
 284552,
 284554,
 284913,
 285358,
 285359,
 285827,
 286295,
 331832,
 332177,
 332194,
 346724,
 378192,
 379979,
 381034,
 381968,
 381985,
 382516,
 384336,
 384337,
 384338,
 384831,
 385223,
 390639,
 396429,
 396430,
 396431,
 396433,
 396434,
 396436,
 396438,
 396441,
 396446,
 396449,
 400806,
 401191,
 404523,
 409868,
 409870,
 410661,
 415305,
 440855,
 442586,
 459886,
 459894,
 460111,
 461513,
 485896,
 512966,
 525831,
 525839,
 568795,
 578641}

In [54]:
farm_data["IDFARM"].unique()

array([ 88797,  88795,  90022, 127796, 127185, 126601, 379979, 126120,
       396441, 400806,  92358, 442586,    808,    390,  50728,  91501,
       390639, 332177, 214141, 126433, 460111,  74971, 114311, 114312,
       404523, 161910, 243332, 220469, 440855, 203705, 203723, 110567,
       284913, 461513, 415305, 285358, 512966, 284383,  16576, 227135,
       525839, 384831, 112369, 188788, 459894, 213488, 204312, 134820,
       286295, 285359, 132739, 140602, 396429, 396431, 396438, 285827,
       382516, 384338, 249686, 381985,  87548, 243524, 381968, 401191,
       188790, 346724, 213476, 381034, 284552,   6303, 203736, 204311,
       277981, 410661, 396433, 396434, 396436, 396449, 409870, 485896,
       249666, 384336, 525831, 378192, 129695, 213774,  40818, 284554,
       396430, 396446, 409868, 243301, 459886, 384337, 385223, 568795,
       331832, 284386, 210080, 578641, 127693, 129601, 129699, 227183,
       332194, 262540], dtype=int64)

In [55]:
farm_data["IDFARM"].nunique()

106

In [56]:
len(set(farm_data["IDFARM"]))

106

In [None]:
# is of type pandas.DataFrame

# farm_data = pd.read_csv(io=path_of_csv_file/name_of_csv_file.csv)

# farm_data["idproducer"].unique()
# farm_data["idproducer"].nunique()

---