# Big Data Midterm xl3139

In [30]:
import pyspark.sql.functions as F

from pyspark.ml import Pipeline
from pyspark.sql.window import Window
from pyspark.sql import SparkSession, DataFrame
from pyspark.ml.feature import RegexTokenizer, NGram, MinHashLSH, CountVectorizer, Tokenizer
from pyspark.sql.types import StructType, StructField, StringType, IntegerType, DateType

## Useful Methods

In [31]:
def show_dataframe(
    dataframe: DataFrame, num_rows_to_show: int = 10, prefix: str = "", suffix: str = ""
) -> None:
    prefix = prefix + ":\n" if prefix else prefix
    suffix = suffix + "\n" if suffix else suffix
    print(f"------------\n{prefix}")
    dataframe.show(num_rows_to_show)
    print(f"{suffix}------------\n")

## Q2. Spark - Language Models - in Spark

### Constants

In [32]:
# * constants
APP_NAME_Q2 = "Question2"
WORD_TEXT_FILE_PATH = "./data/*.txt"

READ_IN_DATA_COL = "value"
PRE_PROCESSED_TEXT_COL = "processed_text"
TOKENS_COL = "tokens"

BIGRMAS_LIST_COL = "bigrams_list"
TRIGRAMS_LIST_COL = "trigrams_list"

BIGRAM_COL = "bigram"
TRIGRAM_COL = "trigram"

BIGRAM_FROM_TRIGRAM_COL = "bigram_from_trigram"
BIGRAM_FROM_TRIGRAM_COUNT_COL = "bigram_from_trigram_count"
BIGRAM_COUNT_COL = "bigram_count"

CONDITIONAL_PROBABILITY_COL = "condational_probability"

### Read Data and Pre-processing

In [33]:
# * create the spark session
spark = SparkSession.builder.appName(APP_NAME_Q2).master("local[*]").getOrCreate()
spark.sparkContext.setLogLevel("ERROR")

# * load the text data
df_text = spark.read.text(WORD_TEXT_FILE_PATH).filter(
    F.col(READ_IN_DATA_COL).isNotNull() & (F.col(READ_IN_DATA_COL) != "")
)
show_dataframe(df_text)

------------

+--------------------+
|               value|
+--------------------+
|@@31678741 <p> If...|
|@@31680641 <p> In...|
|@@31680841 <p> Th...|
|@@31682241 <h> Le...|
|@@31683241 <h> OE...|
|@@31683741 <h> Cl...|
|@@31684841 <p> Un...|
|@@31685141 <h> Co...|
|@@31686141 <h> Cl...|
|@@31688541 <h> Wi...|
+--------------------+
only showing top 10 rows

------------



24/11/08 18:45:31 WARN SparkSession: Using an existing Spark session; only runtime SQL configurations will take effect.


In [34]:
# * pre-processing
# 1. remove all non-alphanumerical characters
# 2. remove all blank spaces at the beginning and tail
df_text_cleaned = df_text.select(
    F.trim(F.lower(F.regexp_replace(F.col(READ_IN_DATA_COL), "[^0-9a-zA-Z]+", " "))).alias(PRE_PROCESSED_TEXT_COL)
)
show_dataframe(df_text_cleaned, prefix="Processed text")

# * tokenize
tokenizer = RegexTokenizer(inputCol=PRE_PROCESSED_TEXT_COL, outputCol=TOKENS_COL, pattern="\\s+", gaps=True)
df_tokens = tokenizer.transform(df_text_cleaned).drop(PRE_PROCESSED_TEXT_COL)
show_dataframe(df_tokens, prefix="Tokens")

------------
Processed text:

+--------------------+
|      processed_text|
+--------------------+
|31678741 p if i p...|
|31680641 p in the...|
|31680841 p thousa...|
|31682241 h lesson...|
|31683241 h oecd w...|
|31683741 h closin...|
|31684841 p until ...|
|31685141 h corona...|
|31686141 h clorox...|
|31688541 h will c...|
+--------------------+
only showing top 10 rows

------------

------------
Tokens:

+--------------------+
|              tokens|
+--------------------+
|[31678741, p, if,...|
|[31680641, p, in,...|
|[31680841, p, tho...|
|[31682241, h, les...|
|[31683241, h, oec...|
|[31683741, h, clo...|
|[31684841, p, unt...|
|[31685141, h, cor...|
|[31686141, h, clo...|
|[31688541, h, wil...|
+--------------------+
only showing top 10 rows

------------



### Main Analysis

In [35]:
# * get the bigrams and trigrams
bigram = NGram(n=2, inputCol=TOKENS_COL, outputCol=BIGRMAS_LIST_COL)
trigram = NGram(n=3, inputCol=TOKENS_COL, outputCol=TRIGRAMS_LIST_COL)
df_bigrams = bigram.transform(df_tokens).drop(TOKENS_COL)
df_trigrams = trigram.transform(df_tokens).drop(TOKENS_COL)
show_dataframe(df_bigrams, prefix="Bigrams")
show_dataframe(df_trigrams, prefix="Trigrams")

------------
Bigrams:

+--------------------+
|        bigrams_list|
+--------------------+
|[31678741 p, p if...|
|[31680641 p, p in...|
|[31680841 p, p th...|
|[31682241 h, h le...|
|[31683241 h, h oe...|
|[31683741 h, h cl...|
|[31684841 p, p un...|
|[31685141 h, h co...|
|[31686141 h, h cl...|
|[31688541 h, h wi...|
+--------------------+
only showing top 10 rows

------------

------------
Trigrams:

+--------------------+
|       trigrams_list|
+--------------------+
|[31678741 p if, p...|
|[31680641 p in, p...|
|[31680841 p thous...|
|[31682241 h lesso...|
|[31683241 h oecd,...|
|[31683741 h closi...|
|[31684841 p until...|
|[31685141 h coron...|
|[31686141 h cloro...|
|[31688541 h will,...|
+--------------------+
only showing top 10 rows

------------



In [36]:
# * count the number of occurrences for each bigram and trigram
df_bigrams_count = (
    df_bigrams.select(F.explode(F.col(BIGRMAS_LIST_COL)).alias(BIGRAM_COL))
    .groupBy(BIGRAM_COL)
    .count()
)
df_trigrams_count = (
    df_trigrams.select(F.explode(F.col(TRIGRAMS_LIST_COL)).alias(TRIGRAM_COL))
    .groupBy(TRIGRAM_COL)
    .count()
)
show_dataframe(df_bigrams_count, prefix="Bigrams Count")
show_dataframe(df_trigrams_count, prefix="Trigrams Count")

# * top 10 trigrams
df_top_ten_trigrams = df_trigrams_count.orderBy(F.col("count").desc()).limit(10)
show_dataframe(df_top_ten_trigrams, prefix="Top 10 Trigrams")

# * split the trigram

------------
Bigrams Count:



                                                                                

+---------------+-----+
|         bigram|count|
+---------------+-----+
|       in south|  189|
|        for low|   14|
|    reserve the|   21|
|      access to|  524|
|        is more|  176|
|    emphasise p|    1|
|     the strain|   21|
|     center are|    2|
|  virus spreads|   34|
|potential every|    1|
+---------------+-----+
only showing top 10 rows

------------

------------
Trigrams Count:



                                                                                

+--------------------+-----+
|             trigram|count|
+--------------------+-----+
|     lot of promises|    1|
|    at tuesday night|    1|
|  selling a quixotic|    1|
|     one p klobuchar|    1|
|most clearly tuesday|    1|
|   visit london four|    1|
|qualifier against...|    1|
|heading household...|    1|
|their greater com...|    1|
|most heavily deva...|    1|
+--------------------+-----+
only showing top 10 rows

------------

------------
Top 10 Trigrams:





+------------------+-----+
|           trigram|count|
+------------------+-----+
|           lt p gt| 1928|
|      the covid 19| 1718|
|            do n t| 1662|
|       of covid 19| 1589|
|     the spread of| 1196|
|           p gt lt| 1037|
|     the number of| 1037|
|           gt lt p| 1023|
|        one of the|  953|
|of the coronavirus|  907|
+------------------+-----+

------------



                                                                                

In [37]:
df_bigram_from_trigram = (
    df_top_ten_trigrams.withColumn(TOKENS_COL, F.split(F.col(TRIGRAM_COL), " "))
    .withColumn(
        BIGRAM_FROM_TRIGRAM_COL, F.concat_ws(" ", F.col(TOKENS_COL)[0], F.col(TOKENS_COL)[1])
    )
    .withColumnRenamed("count", BIGRAM_FROM_TRIGRAM_COUNT_COL)
    .drop(TOKENS_COL)
)
show_dataframe(df_bigram_from_trigram, prefix="Bigram from Trigram")

------------
Bigram from Trigram:



[Stage 179:>                                                      (0 + 10) / 11]

+------------------+-------------------------+-------------------+
|           trigram|bigram_from_trigram_count|bigram_from_trigram|
+------------------+-------------------------+-------------------+
|           lt p gt|                     1928|               lt p|
|      the covid 19|                     1718|          the covid|
|            do n t|                     1662|               do n|
|       of covid 19|                     1589|           of covid|
|     the spread of|                     1196|         the spread|
|           p gt lt|                     1037|               p gt|
|     the number of|                     1037|         the number|
|           gt lt p|                     1023|              gt lt|
|        one of the|                      953|             one of|
|of the coronavirus|                      907|             of the|
+------------------+-------------------------+-------------------+

------------



                                                                                

In [38]:
# * join the dataframs to get both counts
df_joined = df_bigram_from_trigram.join(
    df_bigrams_count,
    df_bigram_from_trigram[BIGRAM_FROM_TRIGRAM_COL] == df_bigrams_count[BIGRAM_COL],
    "left",
).select(
    df_bigram_from_trigram[TRIGRAM_COL],
    df_bigram_from_trigram[BIGRAM_FROM_TRIGRAM_COL],
    df_bigram_from_trigram[BIGRAM_FROM_TRIGRAM_COUNT_COL],
    df_bigrams_count["count"].alias(BIGRAM_COUNT_COL),
)
show_dataframe(df_joined, prefix="Joined Dataframe")

------------
Joined Dataframe:



                                                                                

+------------------+-------------------+-------------------------+------------+
|           trigram|bigram_from_trigram|bigram_from_trigram_count|bigram_count|
+------------------+-------------------+-------------------------+------------+
|      the covid 19|          the covid|                     1718|        1760|
|            do n t|               do n|                     1662|        1662|
|     the spread of|         the spread|                     1196|        1306|
|       of covid 19|           of covid|                     1589|        1621|
|        one of the|             one of|                      953|        1491|
|           gt lt p|              gt lt|                     1023|        1792|
|           p gt lt|               p gt|                     1037|        1936|
|of the coronavirus|             of the|                      907|       17484|
|           lt p gt|               lt p|                     1928|        1930|
|     the number of|         the number|

In [39]:
# * calculate the conditional probability
df_result = df_joined.withColumn(
    CONDITIONAL_PROBABILITY_COL, F.col(BIGRAM_FROM_TRIGRAM_COUNT_COL) / F.col(BIGRAM_COUNT_COL)
)
show_dataframe(df_result, prefix="Conditional Probability")

------------
Conditional Probability:



                                                                                

+------------------+-------------------+-------------------------+------------+-----------------------+
|           trigram|bigram_from_trigram|bigram_from_trigram_count|bigram_count|condational_probability|
+------------------+-------------------+-------------------------+------------+-----------------------+
|      the covid 19|          the covid|                     1718|        1760|     0.9761363636363637|
|            do n t|               do n|                     1662|        1662|                    1.0|
|     the spread of|         the spread|                     1196|        1306|     0.9157733537519143|
|       of covid 19|           of covid|                     1589|        1621|     0.9802590993214065|
|        one of the|             one of|                      953|        1491|     0.6391683433936955|
|           gt lt p|              gt lt|                     1023|        1792|     0.5708705357142857|
|           p gt lt|               p gt|                     103

## Q3. Ranking over Partitions - in Spark

### Constants

In [40]:
APP_NAME_Q3 = "Question3"
BAKERY_CSV_FILE_PATH = "./shared/data/Bakery.csv"
DATE_COL = "Date"
TIME_COL = "Time"
TRANSACTION_COL = "Transaction"
ITEM_COL = "Item"
BAKERY_CSV_SCHEMA = StructType(
    [
        StructField(DATE_COL, DateType(), False),
        StructField(TIME_COL, StringType(), False),
        StructField(TRANSACTION_COL, IntegerType(), False),
        StructField(ITEM_COL, StringType(), False),
    ]
)

DAYPART_COL = "Daypart"
MORNING = "morning"
NOON = "noon"
AFTERNOON = "afternoon"
EVENING = "evening"

SALES_COL = "Items Sold"

RANK_COL = "Ranking"

TOP_THREE_ITEMS_COL = "Top 3 Items"

### Read Data & Pre-processing

In [41]:
spark = SparkSession.builder.appName(APP_NAME_Q3).master("local[*]").getOrCreate()
spark.sparkContext.setLogLevel("ERROR")

df_bakery = spark.read.csv(BAKERY_CSV_FILE_PATH, header=True, schema=BAKERY_CSV_SCHEMA)
show_dataframe(df_bakery, prefix="Bakery")

------------
Bakery:

+----------+--------+-----------+-------------+
|      Date|    Time|Transaction|         Item|
+----------+--------+-----------+-------------+
|2016-10-30|09:58:11|          1|        Bread|
|2016-10-30|10:05:34|          2| Scandinavian|
|2016-10-30|10:05:34|          2| Scandinavian|
|2016-10-30|10:07:57|          3|Hot chocolate|
|2016-10-30|10:07:57|          3|          Jam|
|2016-10-30|10:07:57|          3|      Cookies|
|2016-10-30|10:08:41|          4|       Muffin|
|2016-10-30|10:13:03|          5|       Coffee|
|2016-10-30|10:13:03|          5|       Pastry|
|2016-10-30|10:13:03|          5|        Bread|
+----------+--------+-----------+-------------+
only showing top 10 rows

------------



### Main Analysis

In [42]:
# * compute column Daypart based on time
df_bakery = df_bakery.withColumn(
    DAYPART_COL,
    F.when((F.hour(TIME_COL) >= 6) & (F.hour(TIME_COL) < 11), MORNING)
    .when((F.hour(TIME_COL) >= 11) & (F.hour(TIME_COL) < 14), NOON)
    .when((F.hour(TIME_COL) >= 14) & (F.hour(TIME_COL) < 17), AFTERNOON)
    .otherwise(EVENING),
)

# * count number of items sold per daypart
df_daypart_sales = (
    df_bakery.groupBy(DAYPART_COL, ITEM_COL).count().withColumnRenamed("count", SALES_COL)
)
show_dataframe(df_daypart_sales, prefix="Number of items sold by daypart")

------------
Number of items sold by daypart:

+---------+--------------------+----------+
|  Daypart|                Item|Items Sold|
+---------+--------------------+----------+
|     noon|        Bare Popcorn|         1|
|     noon|    My-5 Fruit Shoot|         7|
|  morning|      Jammie Dodgers|        22|
|     noon|    Christmas common|         5|
|  evening|            Focaccia|         3|
|  morning|          Chocolates|         2|
|     noon|Drinking chocolat...|         2|
|afternoon|           Empanadas|         3|
|afternoon|Cherry me Dried f...|         1|
|afternoon|                Cake|       480|
+---------+--------------------+----------+
only showing top 10 rows

------------



In [43]:
daypart_window = Window.partitionBy(DAYPART_COL).orderBy(F.col(SALES_COL).desc())

df_ranked_daypart_sales = df_daypart_sales.withColumn(RANK_COL, F.rank().over(daypart_window))
show_dataframe(df_ranked_daypart_sales, prefix="Sales Ranking by Daypart")

df_top_three_items = df_ranked_daypart_sales.filter(F.col(RANK_COL) <= 3)
show_dataframe(df_top_three_items, prefix="Top 3 Items Sold by Daypart", num_rows_to_show=12)

------------
Sales Ranking by Daypart:

+---------+-------------+----------+-------+
|  Daypart|         Item|Items Sold|Ranking|
+---------+-------------+----------+-------+
|afternoon|       Coffee|      1476|      1|
|afternoon|        Bread|       847|      2|
|afternoon|          Tea|       566|      3|
|afternoon|         Cake|       480|      4|
|afternoon|     Sandwich|       275|      5|
|afternoon|Hot chocolate|       228|      6|
|afternoon|      Cookies|       185|      7|
|afternoon|    Alfajores|       167|      8|
|afternoon|      Brownie|       150|      9|
|afternoon|         NONE|       141|     10|
+---------+-------------+----------+-------+
only showing top 10 rows

------------

------------
Top 3 Items Sold by Daypart:

+---------+------+----------+-------+
|  Daypart|  Item|Items Sold|Ranking|
+---------+------+----------+-------+
|afternoon|Coffee|      1476|      1|
|afternoon| Bread|       847|      2|
|afternoon|   Tea|       566|      3|
|  evening|Coffee| 

In [44]:
df_result = df_top_three_items.groupBy(DAYPART_COL).agg(
    F.concat_ws(", ", F.collect_list(F.col(ITEM_COL))).alias(TOP_THREE_ITEMS_COL)
)
show_dataframe(df_result, prefix="Top 3 Items Sold by Daypart Aggregated")

------------
Top 3 Items Sold by Daypart Aggregated:

+---------+--------------------+
|  Daypart|         Top 3 Items|
+---------+--------------------+
|afternoon|  Coffee, Bread, Tea|
|  evening|  Coffee, Bread, Tea|
|  morning|Coffee, Bread, Pa...|
|     noon|  Coffee, Bread, Tea|
+---------+--------------------+

------------



## Q4. Duplicate Detection with Minhash

**Credits**

Shout out to [this explanation](https://www.pinecone.io/learn/series/faiss/locality-sensitive-hashing/) on Minhash/LSH (Local Sensitive Hashing) Algorithm, which greatly helped me understand the concept.

### Constants

In [45]:
APP_NAME_Q4 = "Question4"
HUFFPOST_JSON_FILE_PATH = "./shared/data/Huffpost.json"
BASE_NEWS_ITEM_SHORT_DESCRIPTION = (
    "Kitten Born With Twisted Arms And Legs Finds A Mom Who Knows She\u2019s Perfect"
)

SHORT_DESC_COL = "short_description"
TOKENS_COL = "tokens"
VECTOR_COL = "vectors"

MINHASH_COL = "minhash"
NUM_HASHTABLES = 5

JACCARD_SIM_COL = "jaccard_similarity"

### Read Data & Pre-processing

In [46]:
spark = SparkSession.builder.appName(APP_NAME_Q4).master("local[*]").getOrCreate()
spark.sparkContext.setLogLevel("ERROR")

df_huffpost = spark.read.json(HUFFPOST_JSON_FILE_PATH)
show_dataframe(df_huffpost, prefix="Huffpost")

------------
Huffpost:

+--------------------+--------------+----------+--------------------+--------------------+--------------------+
|             authors|      category|      date|            headline|                link|   short_description|
+--------------------+--------------+----------+--------------------+--------------------+--------------------+
|Carla K. Johnson, AP|     U.S. NEWS|2022-09-23|Over 4 Million Am...|https://www.huffp...|Health experts sa...|
|      Mary Papenfuss|     U.S. NEWS|2022-09-23|American Airlines...|https://www.huffp...|He was subdued by...|
|       Elyse Wanshel|        COMEDY|2022-09-23|23 Of The Funnies...|https://www.huffp...|"Until you have a...|
|    Caroline Bologna|     PARENTING|2022-09-23|The Funniest Twee...|https://www.huffp...|"Accidentally put...|
|      Nina Golgowski|     U.S. NEWS|2022-09-22|Woman Who Called ...|https://www.huffp...|Amy Cooper accuse...|
|                    |     U.S. NEWS|2022-09-22|Cleaner Was Dead ...|https://www

In [None]:
tokenizer = RegexTokenizer(inputCol=SHORT_DESC_COL, outputCol=TOKENS_COL, pattern="\\s+", gaps=True)
vectorizer = CountVectorizer(inputCol=TOKENS_COL, outputCol=VECTOR_COL)

# Build the pipeline
df_huffpost_tokenized = tokenizer.transform(df_huffpost).filter(F.size(F.col(TOKENS_COL)) > 0)
model_vectorizer = vectorizer.fit(df_huffpost_tokenized)
df_vectorized = model_vectorizer.transform(df_huffpost_tokenized)
show_dataframe(df_vectorized, prefix="Vectorized Huffposts")
print(sorted(model_vectorizer.vocabulary)[:10])

df_base_desc = spark.createDataFrame([(BASE_NEWS_ITEM_SHORT_DESCRIPTION,)], [SHORT_DESC_COL])
df_base_desc_tokenized = tokenizer.transform(df_base_desc)
df_base_desc_vectorized = model_vectorizer.transform(df_base_desc_tokenized)
show_dataframe(df_base_desc_vectorized, prefix="Vectorized Base Description")

                                                                                

------------
Vectorized Huffposts:

+--------------------+--------------+----------+--------------------+--------------------+--------------------+--------------------+--------------------+
|             authors|      category|      date|            headline|                link|   short_description|              tokens|             vectors|
+--------------------+--------------+----------+--------------------+--------------------+--------------------+--------------------+--------------------+
|Carla K. Johnson, AP|     U.S. NEWS|2022-09-23|Over 4 Million Am...|https://www.huffp...|Health experts sa...|[health, experts,...|(191855,[0,1,3,6,...|
|      Mary Papenfuss|     U.S. NEWS|2022-09-23|American Airlines...|https://www.huffp...|He was subdued by...|[he, was, subdued...|(191855,[0,1,3,4,...|
|       Elyse Wanshel|        COMEDY|2022-09-23|23 Of The Funnies...|https://www.huffp...|"Until you have a...|["until, you, hav...|(191855,[2,11,15,...|
|    Caroline Bologna|     PARENTING|202

### Main Analysis

In [52]:
minhash_lsh = MinHashLSH(inputCol=VECTOR_COL, outputCol=MINHASH_COL, numHashTables=NUM_HASHTABLES)
model_minhash = minhash_lsh.fit(df_vectorized)
df_minhashed = model_minhash.transform(df_vectorized)
show_dataframe(df_minhashed, prefix="Minhashed Huffpost")

df_base_minhashed = model_minhash.transform(df_base_desc_vectorized)
show_dataframe(df_base_minhashed, prefix="Minhashed Base Description")

------------
Minhashed Huffpost:

+--------------------+--------------+----------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+
|             authors|      category|      date|            headline|                link|   short_description|              tokens|             vectors|             minhash|
+--------------------+--------------+----------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+
|Carla K. Johnson, AP|     U.S. NEWS|2022-09-23|Over 4 Million Am...|https://www.huffp...|Health experts sa...|[health, experts,...|(191855,[0,1,3,6,...|[[1.7908219E7], [...|
|      Mary Papenfuss|     U.S. NEWS|2022-09-23|American Airlines...|https://www.huffp...|He was subdued by...|[he, was, subdued...|(191855,[0,1,3,4,...|[[1.7908219E7], [...|
|       Elyse Wanshel|        COMEDY|2022-09-23|23 Of The Funnies...|https://www.huffp...|"

                                                                                

+--------------------+--------------------+--------------------+--------------------+
|   short_description|              tokens|             vectors|             minhash|
+--------------------+--------------------+--------------------+--------------------+
|Kitten Born With ...|[kitten, born, wi...|(191855,[2,4,12,4...|[[2.63406707E8], ...|
+--------------------+--------------------+--------------------+--------------------+

------------



In [53]:
df_result = model_minhash.approxSimilarityJoin(
    df_base_minhashed, df_minhashed, float("inf"), distCol=JACCARD_SIM_COL
)

df_top_five = df_result.select(
    f"datasetA.{SHORT_DESC_COL}", f"datasetB.{SHORT_DESC_COL}", JACCARD_SIM_COL
).orderBy(JACCARD_SIM_COL).filter(
    f"datasetA.{SHORT_DESC_COL} != datasetB.{SHORT_DESC_COL}"
).limit(5)

show_dataframe(df_top_five, prefix="Top 5 Similar Items (Duplicate Removed)")

------------
Top 5 Similar Items (Duplicate Removed):



[Stage 318:>                                                      (0 + 10) / 11]

+--------------------+--------------------+------------------+
|   short_description|   short_description|jaccard_similarity|
+--------------------+--------------------+------------------+
|Kitten Born With ...|A cookie with the...| 0.782608695652174|
|Kitten Born With ...|With a back flip ...|0.8235294117647058|
|Kitten Born With ...|The stories of an...|0.8260869565217391|
|Kitten Born With ...|Who needs a scrip...|0.8333333333333334|
|Kitten Born With ...|Traveling as a te...|0.8461538461538461|
+--------------------+--------------------+------------------+

------------



                                                                                