In [3]:
import nltk

In [10]:
! pip install nltk
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')



[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\pc\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\pc\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping taggers\averaged_perceptron_tagger.zip.


True

In [12]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, GRU, Dense, Dropout, Concatenate
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
import nltk
from nltk import bigrams
from collections import Counter
import string
from nltk.corpus import stopwords
import networkx as nx

# Define functions for additional features

import nltk
from collections import Counter
from nltk.util import bigrams
import string
import networkx as nx
from nltk.corpus import stopwords

def phrase_patterns(text):
    tokens = text.split()
    if len(tokens) < 2:
        return []  # Return empty list for texts with less than two words
    bigram_counts = Counter(bigrams(tokens))
    significant_collocations = [bigram for bigram, count in bigram_counts.items() if count > 1]  # Example threshold for significance
    return significant_collocations

def sentence_length(text):
    sentences = nltk.sent_tokenize(text)
    avg_sentence_length = sum(len(sentence.split()) for sentence in sentences) / len(sentences)
    return avg_sentence_length / 100  # Normalize between 0 and 1

def punctuation_frequency(text):
    punctuation_counts = Counter(char for char in text if char in string.punctuation)
    total_punctuation = sum(punctuation_counts.values())
    punctuation_distribution = {punct: count / total_punctuation for punct, count in punctuation_counts.items()}
    return punctuation_distribution

def pos_tag_frequency(text):
    tokens = nltk.word_tokenize(text)
    pos_tags = nltk.pos_tag(tokens)
    pos_tag_counts = Counter(tag for word, tag in pos_tags)
    total_pos_tags = sum(pos_tag_counts.values())
    pos_tag_distribution = {tag: count / total_pos_tags for tag, count in pos_tag_counts.items()}
    return pos_tag_distribution

def function_words(text):
    stop_words = set(stopwords.words('english'))
    tokens = nltk.word_tokenize(text)
    function_words_text = [word for word in tokens if word.lower() in stop_words]
    return function_words_text

def ngram_transition_graph(text, n=2):
    tokens = nltk.word_tokenize(text)
    ngrams = list(nltk.ngrams(tokens, n))
    transition_graph = nx.DiGraph()
    transition_graph.add_nodes_from(ngrams)
    for i in range(len(ngrams) - 1):
        transition_graph.add_edge(ngrams[i], ngrams[i + 1])
    return transition_graph

def ngram_transition_graph_similarity(graph1, graph2):
    nodes_graph1 = set(graph1.nodes)
    nodes_graph2 = set(graph2.nodes)
    intersection = nodes_graph1.intersection(nodes_graph2)
    union = nodes_graph1.union(nodes_graph2)
    return len(intersection) / len(union)

def type_token_ratio(text):
    tokens = nltk.word_tokenize(text)
    unique_tokens = set(tokens)
    return len(unique_tokens) / len(tokens)

def voice_detection(text):
    sentences = nltk.sent_tokenize(text)
    active_count = 0
    passive_count = 0
    for sentence in sentences:
        if 'is' in sentence.split() or 'are' in sentence.split():
            passive_count += 1
        else:
            active_count += 1
    total_sentences = len(sentences)
    if total_sentences == 0:
        return 0
    return active_count / total_sentences  # Normalize between 0 and 1

# Sample data (replace with your dataset)
texts_positive = ["""The 4 tile mural I worked a week on went into the kiln,along with everything else, and thankfully everything was ok except the mural.  The underglaze was too thick, and the glaze was too thick (I decided to float glaze the tile for fear of smearing all the black.)  The glaze actually picked up the black, moved it over, and then fired in a big blob in several spots.  Live and learn.  I just don't know if I have it in me to make another one.   I'll have to dig deep for this one.  I guess I should be thinking of the kiln as half full instead of half empty!""",
    """I'm so tired today because I was up all night worrying about the kiln firing.  It smelled something fierce, and I was worried we were all going to die of carbon monoxide poisoning in our sleep.  Plus, I kept hearing banging, which I hope wasn't anything exploding in the kiln, but I haven't found out yet because it's still 600 degrees the next day.""",
    """I happily called the lady about the tile mural that was just set to see how great it looked.  To my suprise, she was very shocked at how warped the tiles look when set.  I was so upset I had nightmares all night and obsessed about it all day. Luckily, she called me back the next day to apologize because she was in a bad mood.  P.s.  Their check was returned the next day.""",
    """Today I must get 100 bisque white tiles today.  My supplier of 7 years has my order of 4 weeks ago delayed in Mexico in customs.  Note to self:  Remember Murphy.  Never assume anything.""",
    """MUST PAY SALES TAXES TODAY OR ELSE!  Something about a deadline is a sure cure to get one off one's butt.  Yesterday placed a free ad in the local paper. Cost for free, and for free stuff-This should be VERY interesting to see the response. This week I should test some cone 5 glazes just for the heck of it.""",
    """fired the last mural.  now I'm depressed.  it's kind of like planning a wedding all year and then the day after you have nothing to do.  Not that I have nothing to do, just no one telling me what I have to do. (work orders) I could have been better prepared with a summer camp to fall into; I'm certainly getting phone calls.  But I was too sick to plan a couple of months ago."If you fail to plan, you plan to fail!"  On another note, I am regretting not buying the small test kiln.  Twice , and now three times I have lost a customer or an opportunity because I didn't have a smaller kiln to do test tiles in or cone 10, or whatever.  I will be getting one soon.""",
    """make hump molds make slab shapes:  babies, women,large star windchimes, small stars make multi-level vase  garden tiles or initial tiles peacock tray baby stuff for daniel-frame w letters, ornament, train plaque,send tiles for footprints(also to Liz) mix colored dipping glazes in quart containers from Smart and Final marketing tools needed: scout flyers party flyers new maps or general flyers summer camp schedule flyers baby footprint postcards fix website babyfootprint gift certificates""",
    """well, the Robinson mural worked out.  4 tiles cracked or broken, all re-painted.  Hope they like them.  The Cordillera mural is getting bigger everyday, now 75 more tiles than anticipated.  They look beautiful going into the kiln.  Double stilting them for less warping.  Had a chance to teach a self-portrait class for children;one of my favorites.  This week had my second student for handmade tiles who wants to go into business as such.  After a little trepidation, I gave into the fact that I am a teacher, and so I teach.  She however wants to learn clay crafting, versus painting, so this is fun.  Got to break open my plaster, and discovered I have a love-hate relationship with plaster carving. Thought it might be easier if I colored the plaster in three after mixing the batch and pour it in layers, so you could see what you are doing. I do love pouring molds.""",
    """I have a school painting on Monday.  For some reason I have a total mental block about the glazes.  Couldn't get the bottles I want,don't want to use the old ones.  Debating which glazes to use, the yucky cheap ones, or the nice expensive ones.  Usually, I have no choice.  I just don't want to spend any more money on half-used glazes. I have a hundred different colors, but not enough of any one to fill 12 bottles.""",
    """Well, I got a call from the mural organizer who asked if the tiles would be ready to view tomorrow.  Of course, I hadn't even re-fired the tiles yet, or done the two "dog paw" accents, or the tile that no one wanted to paint, or the stupid 4 tile center.  So I had to fess up, and beg for more time.  Of course, now that my rear is on the line, I managed to become amazingly inspired and finish the two accent tiles while my kids got ready for school.  I cancelled a doctor's appointment, and painted the stupid center.  Not perfect and beautiful like the last one, but it's there at least.  Now the dilemma of how lazy do I want to be?  Should I leave it black and white, or color it in, and if I color it in, how much color should I bother with?  Or is it passable as it is?  Mostly, I just fear total failure like the last time.""",
    """The great news is the mural I started working on last year, I was asking $6/tile. At that rate I wasn't going to get anywhere, so I asked for $8/tile. I've waited a year, and the lady felt so bad, she said they would give me $10/tile..Yippee!""",
    """Today I need to start mixing glazes for the last tile painting for the school year. The question is can I get 200 of the new nozzle bottles I tried out this weekend by Friday shipped and filled? Or should I go with what I have , which now seems like crap compared. They of course will never know the difference. Re-painting 2 tiles that broke, refiring two broken tiles, and refiring 6" tiles that the glaze didn't flatten out all the way. Must be done asap because they are setting this weekend, and has already been delayed once. Guess I better start re-painting those stupid 4 part mural tiles. Ugh!""",
    """Today I had a glass artist over for a firing.  It was a good excuse to do some research on fused glass.  My past attempt at painted,fused, and slumped glass turned out so-so.  I have some material already, so it would be nice to learn how to use it properly.  She is an older woman, and I had a nice time talking about glass with her; how she sells her stuff (in Venezuela), and just about life as an artist.  I custom programmed my kiln (she usually does it manually).  It was interesting to know that you can open a red hot flaming kiln with glass inside with no breakage.  I can't wait to see what is inside.  Learned about cutting glass, slumping in bisque, applying enamels to gum arabic through a sifter, using elmers glue to stick shards of glass together, using a metallic sharpie to write with on glass, and firing inclusions and dichroic glass.  Like to test frit on clay and glass.  baking soda makes bubbles between glass (use sparingly).Use of fiber paper vs. kiln wash.  slump at a higher temperature and fire paint at a lower temp to keep intensity of color.Use ceramic frames for drop molds (dishes)  Cracked bisque works fine as a glass saggar! """,
    """Attending NCECA in San Diego in 2003 was a turning point for me in many ways. Little did I know when several people asked if I was going, that it was more than I could have imagined. I went reluctantly, tired from work, but curious. When I got there I was lost, and wandered aimlessly, not knowing what I had walked into. I paid my $65, and set off to see what it was that everyone thought was so great. I wandered in and out of lectures and demonstrations. I was most interested in the business lectures, only really wanting to find a way to make a living doing what I love. I wandered through exhibits, spying the mug sale, the cone box contest, and the k-12 children's entries. Was my stuff up to par with the "real" teachers who had a degree? I vowed I would enter next year, just to be competitive.(I didn't , but that's another story). I was really excited to go to the basement area where everyone was selling everything. Tools I didn't know existed, schools beckoning (asking myself, how would my life had been different if I had majored in ceramics, and not married and had children), companies throwing samples my way by the caseloads. Paper, paper, and more paper. The next day was better, knowing that I was there to learn as much as possible in a short time period. I sat through lectures and demos. I absorbed conversations and watched people look and watch. When I got back home to my studio, I wasn't the same. When I left, I was a housewife that had more than a passing interest in a hobby. I was an entrepeneur, trying to find the holy grail that would catapault me from sometimes breaking even to supporting myself. When I came back, I felt like an artist. I realized I knew much more than I thought. I realized that the real world experience I had jumped into blindly had given me more opportunity than most people get in a lifetime of study. I saw my life 20 years from now, and 40 years from now, planning what I would like to do when the kids are grown and this season of my life had passed. I saw myself, 70 years old, touching the clay and asking the questions...... First , when I got back to work, I was engulfed by production and exploring new avenues of business. I taught with a new confidence, that yes, I knew what I was doing with what I did, and everything else would come later. I experimented more, and slowly the studio became a studio, not a storefront. I had an apprentice, and a muse. I would spend hours with the music on, in the silent of my space, pondering the next projects, or working with ferocity. I realized the sacrifices I had made as an artist, in my ventures as a businesswoman. I had no extra time or energy to "create" for the sake of creating, going into the unknown with no "agenda". I did not know what that felt like. I closed the studio. It felt like death. Where was my purpose without a store to support? I hated being just a mother. I almost couldn't do it, and didn't have to. I had renegotiated my lease for pennies. But I knew I had to cut off my arm for another one to grow literally. I moved the studio to my home, like a lot of potters do. I am lucky that I have patient people who live with me that accept the studio taking over the whole of the house. The driveway, the garage, the courtyard, the livingroom, the office, even in the bedroom. They know my sanity lies in it.""",
    """ceramic doorhangers with addons fused in themes: horse,flower ect-blank for dry erase ceramic lightswitch faceplates with addons fused in themes also pour lightswich plates then handbuild over them and around them gifts to do:scriffito doorhangers for stefani,emily,and natalie daniel and also ceramic babybottle bank for daniel and babyblocks frame family tree large tile with handbuilt additions and a "wall" or fence around it GO GET 200 BOTTLES FROM C +C WHEREHOUSE AND PICKUP AND RANDIS""",
    """recently tried a new dipping clear that unfortunately was discontinued due to lead leeching. Won't use it on dinnerware, but, oh my god, it is beautiful. Good thing I didn't return it to the factory like they wanted. Wonder if they'll still sell it with a different label warning. They should! I will write them because they took an uneccessary beating because of the mistake."""]  # Provide a list of texts from author A
texts_negative = ["""						
        As promised, here's the next instalment of bus mongs.  I bet you've been looking forward to this, haven't you...   2. Bus Monitors  Now, in every walk of life, in every profession, in every place where humans exist there are heirachies.  I accept these heirachies with varying degrees of grace.  But, if there is one thing that makes me want to stick two fingers up to "The Man" and form a rock n' roll band, it's people who assume importance and status without any requirement for them to exist.  I have to be careful here to convey exactly what I mean.  I want you to understand.  Two elderly women on my bus service have elected themselves bus monitors.  As far as I know, there was never any formal nomination.  Let's be clear; these people have assumed the position of bus lords.  This basically involves:  a)  Sitting right behind the driver and shouting conversations at him in a "spirit of the blitz" style dialect.   Eg: "Ooh 'ello Frank, I 'ope you'll be putting yer foot down today, my Bert's expecting his dinner!".   Essentially, mindless, insiduous prattle.  The volume at which these conversations take place cow everyone around them into aural submission.  No-one can read, listening to music is impossible, and quiet chats with friends are verboten.  Essentially, this is an exercise in illustrating that they are friends with the driver, and so assume some of the importance they crave by association.  They rarely look around or even notice other bus people, the bus people they nominally claim to represent.  b) Getting on the bus first.  This is truly the raison d'etre of the bus monitor.  They force themselves, elbows and handbags flailing, onto the buses first for three reasons.  Firstly, this (again) gives them the air of importance and status that they crave.  Secondly, getting on the bus first gives them first choice of seats - they can then position themselves in prime bus real estate for loud driver conversations.  Thirdly, this allows them to have protracted chats with the driver, and fumble for their tickets whilst a large queue stretches back outside getting drenched in the rain.   c)  On the rare occasions where a new driver has been in place (I always feel great sympathy for these hapless footsoldiers, thrust naively onto the battlefield), bus monitors enter a state of heightened awareness.  Not content with shouting often unnecessary directions into the side of the driver's head, they will also offer information on who normally gets on at those stops, whether to wait for them if they aren't there and other classified, bus-monitor-priveleged information.  MI5 themselves would have dossiers less detailed on members of the Taliban.  d) On the rarer still occasions where the bus makes a wrong turning, the bus monitors become a flurry of activity.  "Wrong way!" they shout, whilst looking around incredulously at fellow passengers, as if the driver had defaced a war memorial.  e) Bus monitors are the guardians of bus protocol.  Although they can blatantly disregard other passengers, any kind of ignorance on the part of other passengers is met with disapproving looks.  Any breach of accepted protocol, whether or not you have ever been in this country before, been on a bus before, have the use of your arms and legs etc is met with their clear disgust.    Wedged into their seats with their old-woman paraphanelia, these are actually quite sad individuals.  I can only imagine the voids in their lives must have become slightly less yawning when they found solace in bossing people about on buses.  In two years of bus usage, I have yet to see them justify their self-appointed positions, and on top of it all, they clearly enjoy this.  They act like they are doing me a favour.  If getting on my nerves and stinking of Parma Violets is somehow helping me, I can only marvel at what my shortcomings must have been to start with.  Perhaps I was too relaxed and the bus didn't smell of Parma Violets enough.  We can but wonder.  This is just a small sample of the irritations that these people cause, and for once, I am not just saying that because I can't think of anything else.  It really is just a small sample.""",
    """						
        In case any of you people care, I am one of the hardy souls of this world who commute to work.  Yes, I get the bus.  And I like it.  In fact, a 30-45 minute journey in the morning is an unbelievably relaxing way to get to work.  In a carefully temperature controlled cocoon, you can pop a bit of music on and watch the scenery, leaving all the actual "doing" to someone else.  Namely the driver.  It's difficult to convey the benefits of merely sitting, doing nothing, on an adequately comfortable seat, and not having to worry about anything for half an hour.   This is, in theory, superb.  However, my idyll in this metal tube with wheels is frequently tested by putrid invaders.  Invaders of the worst kind.  Space invaders, if you like.  I have attempted to categorise them in a new series, starting below.  All users of public transport will identify them.  And though they have many names, their presence is unmistakable.    1.  The feckless youngster.  Yesterday a regular user of my bus service, a feckless young girl, brought into sharp focus why I hate other bus people so much.  Perhaps I should avoid the term "bus people", as this either suggests a gypsy-like existence in an abandoned bus, or people who actually resemble buses.  Either way, it's not what I am trying to say.  Basically, I shall now define "bus people" as people who get the bus, in order to avoid confusion.  Anyway, her crimes against me are myriad and serious.  In an international court of bus law (ICBL) she would probably be tried and sentenced to death.   We have a distinct history.  It all started when, about 18 months ago, this individual started to wait at my stop.  Looking little different from the usual slack-jawed windowlickers of my home town, I paid little heed, instead assuming my favourite bus-waiting position of roughly perpendicular to the shelter in order to look up the hill, legs heroically akimbo like the Collossus of Rhodes.  I was somewhat surprised when she boarded the private vehicle which takes me to work.  Perhaps I had misjudged her, despite her appearance and demeanour.   A few weeks passed with respectful silence between us.  All was well, and I felt we had formed an invisible bond of ignoring eachother.  But then, a terrible thing happened. One day, she approached the bus stop, and I was unfortunate enough to momentarily lock eyes with her.  This, as most people would doubtless know, is a pre-cursor to some kind of conversation.  To my alarm, I had discovered that my mouth was open as well.  Snapping it shut, I did my best to rescue the situation.  I noticed that something was different about her... something was amiss.  My mind raced to pin it down.  Of course! Her hair.  She had dyed her hair.    "I like your hair" I said, before the full disastrous impact of what I had done hit me.   I had sparked up a conversation with a bus person!  No more louche days reading in the window seat, listening to the latest grooves.  No more beautiful days watching the speeding countryside.  I would be sucked in, engulfed in this desperate harlot's whirlygig of hair chat.  Maybe the whole situation would escalate to shopping, or worse, work.  Oh cruelest of all fates!!! Why?  Why did my tongue forsake me, when I most needed it to stop it's diabolical dance!  "Oh, thanks, I only di...."  By this time I had run onto the bus.  I couldn't risk more contact or possible friendship with this woman.  She would doubtless destroy what little peace I could wrestle from my day.  More would come of this, I was sure, and indeed it did.  An insidious campaign of irritation followed.  Once, the bus arrived ridiculously early, and we both missed it.  An uncomfortably long period of waiting ensued, before it was clear that no bus would be coming.  I was forced by the situation to offer a non-commital "I think we've missed it".  She rudely turned her back and stormed off, frantically jabbing at her mobile phone.  As we were both bound for the same destination, and we had both missed the same bus, a nice gesture would have been to offer a place in the lift she was undoubtedly arranging (although I would have turned her down on principle).  Instead she glared at me as if I had somehow Karmically arranged the absence of the bus in order to ruin her day.    This week alone, of the 5 days which are busable, she has neglected to have a ticket on 3 days.  This is not only gyppo behaviour, but is also an embarrassing social situation, which I seek to avoid at all times.  All 3 times, she has been "let off" the fare, which has only exponentially increased my contempt for her.  Then there's the running.  I get on the bus first, due to clever kerbside positioning.  She gets on immediately afterwards, and I swear she runs directly behind me, hurrying me along.  I feel obliged to hurl everything into the seat and dive out of her way.  Why she feels the need to hurtle up the bus is a mystery to all except me.  To me, it is but more evidence of her idiocy.    It's clear she thinks she is the J-Lo of the bus community.  Well she got her commupance today alright.  As the bus drew near, some schoolchildren passed us.  Their cries of "She's got a £2 handbag!" were delight to my ears as they systematically humiliated my self-important co-busee, who dresses like someone doing an impression of a character from Sex in the City down on their luck.  Other times the bus has pulled away, as she frantically runs behind it, and I have merely sat, smiling smugly.  Oh, good will have it's days.  But such are the cosmic forces of yin and yan that my victories are only part of a timeless struggle.  One which must be won at all costs. 
    """,
    """						
        They're Good, but Let's Not Start Any Wars Over Them   Well, in a new section of the page, I look at music and decide whether it's any good, for the benefit of you, the reader.  I will call it "My Opinion on Music".  Or "Reviews".  Yeah, that one.       Well, Franz Fedinand (or "The 'Nand" as I haven't christened them) are a Scottish indie type outfit.  That doesn't do them justice - "Indie" is used far too loosely nowadays to have any real meaning.  In this instance, let's take it to mean that they are progressive and slightly non-conformist. What's their sound like? I'll tell you.  They owe a big debt to Tom Verlaine and Television.  That kind of skewed funkiness cut through with some melodious guitar work and bass lines.  Then, in other instances, lead singer Alex Kapranos sounds like a more coquetteish Ian Curtis.  Either way, the mix spells funky and the music spells good. There's flashes of Iggy Pop's The Idiot in the density of some of the tracks, flashes of The Pixies in the pop-artful approach to lyrics.  Bizzarely, some parts of the album also recall Blondie at their Parallel-Lines zenith.  You work it out.  I can't be bothered. I've read and heard comparisons to "The 'Werk" (Kraftwerk).  This is pretty crass on the surface - there's snatches of German on some tracks, which is probably the main reason for the comparisons.  However, having said that, there is an undercurrent of a peculiarly teutonic baroque.  Difficult to pin down, but themes like darkened cinemas and dancing with men called Michael conjure a particularly Weimar atmosphere, in my mind at least. So we've established that their influences are a smorgasbord of left-field  artists.  But what is the driver that make The Nand stand out? Well there are moments of adreneline pumping brilliance.  The type that makes you want to go out and have a fight or run really fast, like all the best music does.  The opener "Jacqueline" is a multi-layered romp which displays a joy for words and sound which is refreshing.  It's slightly self-consciously skewed - it's not full-on absurdity, but has kind of taken a toffee hammer and tapped the norm hard enough to make it less normal.  Rhyming "spectacles" with "erecticles" is one such example.  The barnstorming chorus, which extols the virtues of holidaying is another.  A well rounded debut, all in all, but as a friend said to me after the Stroke's first album - "Where do they go from here?".  They might have just painted themselves into a corner by releasing something so polished so soon. Time will tell, but until that time tells, don't go assassinating any Archdukes.  

    """    ,    """						
    I can't think of anything to write today, so this is going to go one of two ways.  Either I will turn this into an entertaining missive on not being able to write anything, or it will just grind to a halt, teetering precariously on the keep/delete axis.  Nearly ground to a halt after that sentence.  I suppose this hinges now on how long I have to continue for to make this a missive.  I don't know if there is a central agency which sets the length of missives, tracts and statements.  If not, there should be.  It would at least prevent confusion at times like this. 
    """ , """						
    I thought today about forming a Lonely Club.  Not that I'm lonely, but it seemed a compassionate thing to do.  To get lonely people together in a non-threatening atmosphere.  I could send out leaflets which say things like "Spend a lot of time on the Playstation?" or "Lonely?".  I think there would be a good response.  And then I could franchise it out, to other Lonely Co-ordinators - an entire network of Lonely Clubs could spring up, eradicating loneliness forever.  But then I thought, what if no-one turned up?  Could there be anything more tragic than someone organising a Lonely Club meeting and ending up totally alone.  That could push some Lonely Club organisers over the edge.  I suppose they could work with a friend, you know, so they didn't get Lonely. 

    """ , """						
    So I got my Digital Camera and I pretty much have it all figured out, I just need to know how to get pictures posted up on here now.  So off I go to explore and hopefully the next post will have a picture.  WEEEEEEEEEEEEEEEEEEEEE!!!
    """ , """						
    Did you ever wake up one day and everything just seemed to go totally right?    You actually want to get out of bed even though it is 4:00 a.m., your shower is awesome, your son is ready ON TIME for school, you look decent, the road to work is practically flawless and your favorite songs are all playing on the radio, your Mocaccino is Orgasmic and your Boss is in such a chirpy mood...    Well, today is that day for me and it just keeps on getting better.  My Boss told me that today was THE DAY for my bonus (Prefect timing because I have really been wanting that Digital Camera).  My cousin is in town and I rarely see her (she moved to Alberta, then Yellow Knife, now Niagara Falls) and a bunch of us are going to go for drinks tonight so its going to be picture time.  Its not sunny outside, but its warm (mostly humid but it's ok cause I left my hair curly today).  I am going to actually have time to take a full hour lunch and I will get to do so with my dad, brother and cousin.  And last but not least (or maybe Least but not last?) I am actually getting quite a bit of work done (well not right THIS second) so I wont feel guilty this weekend and think about all the things I have to do on Monday.  I dunno, maybe this is lack of sleep talking, but it really is a great day, it doesn't take much to please me huh? LOL  Ok, well back to work I go, have a good weekend.

    """ , """						
    I think I have had enough with men for at least the next 5 years.  Either I am super unlucky or I am a real Bitch (I am leaning more towards the earlier).  I can't seem to meet anyone half decent even if my life depended on it.  I am not talking about a serious, lets be monogamous type of relationship, I am talking about a simple friendship.  I have been talking to this guy for 4 years, Matt, aka Mr. Arkansas, we have shared every little secret (or at least I did) and every thought and fantasy and feeling and out of no where I am a bitch because I demand a little more after four fucking years.    My fuck friend on the other hand... He's in general not too bad... I just only see or hear from him when he wants some, god forbid Cindy has any needs.  There have been other guys over the last 2 years, one so called friend only called me or came by to smoke-up and watch movies when he was single, once he found himself a new fling, bye bye Cindy.  I called him on it the last time it happened, he said he would try and squeeze me in, I told him not to bother and guess what, that was the end of that.  This other guy, claimed he liked me and wanted to date me and so on, it was all BS.  I am ok with casual sex, I am only human and have needs too.  If that's all you want, just be up front about it and whatever decision I make at least it is my fault and I can't blame anyone but myself.  You would think that is pretty simple but no, not in this world.  I don't really know what I want and I am not out there trying to hook up with anyone, however I do know one thing, I want a friend (a male one, actually a woman would do just fine), I guess I am looking for a friend that I can be intimate with and also depend and trust.  I don't know if that makes sense, I'm so confused, I'm so tired of being alone.  Blah!!!
    """ , """						
    One of those killer days where nothing goes right for the boss, and you get blamed or the littlest thing happens and you get tons of shit.  I have a pounding head ache, I haven't had lunch or any break as a matter of fact.  All I want to do is go home, eat and take a long ass MOFO bubble bath.  Ciao!
    """ , """						
    Is my interest in this whole thing waning?  It wouldn't surprise me.  I had a go with a couple of these before.  They turned out rubbish. On the other hand, this is day 2.  You're still here.  So am I.  Both of I.  So this has turned out great!  Maybe a 2nd anniversary party should be arranged.  I have just the people in mind.
    """ , """						
    So I had a new patient yesterday, a man in his 90's, a sweet old man who is probably very lonely.  I always ask my patients how their weekend was or if anything special happened in their week and one thing led to another and I found out that his grandson never visits.  He lives in town and it is so sad that he never goes and visits.  The patient was telling me that he has never even met his great grand kids.  I just couldn't believe it.  I asked him if he had any other family in town and he mentioned a grand daughter.  When she came to pick him up, I pulled her aside and told her she needs to bring her kids to visit their grand father and great grand father.  She said that she would try and make an effort but she didn't really know what to say to her kids.  I was floored.  It's your family, you don't need to say anything special, just go and visit.  They left and I just felt so sad.  I really hope she does make an effort.  I asked my kids if they would ever not visit their grand parents and they said they would be upset if they couldn't visit them.  I hope I am raising them right, I would be so sad if they didn't visit me when I was old and couldn't do things on my own. 

    """ , """						
    Today is the anniversary of Elvis' death.  What do you think happened to Elvis?  Is he still alive?  I don't think he is, I mean look at all the cheeseburgers he ate... However, I am one of those people who like to believe that he is still alive, you know, just chillin, living on some remote island with Tupac and Biggy.  You know, that's really not that weird of a theory. 

    """ , """						
    I'm gonna go ahead and assume that a majority of the people who read this don't watch much t.v. or if you do, its most likely Discovery, History, National Geographic or some other channel that requires you to think a little bit (come on, if you watch the learning channel, you at least have to think a LITTLE).  I too, watch those channels, but every now and then, I like to shut off my brain and watch some mindless crap.  So last night, I watched one of my favorite movies (mainly cause it makes me laugh) "Sweet Home Alabama".  Love it.  If you have never seen it, shame on you!  You need to go and rent it right now, go ahead, I'll wait....   Ok, now that you have seen it, don't you just love it?  It's so cheesy and so predictable but you know what, I love those types of movies.  Another movie I love, "Two Weeks Notice", have you seen it?  Its another good one.  I have to say, Sandra Bullock and Hugh Grant make a great pair.  Oh, and lets not forget "Bridget Jones' Diary", how can you NOT love that movie?  You gotta love Bridget, she's awesome.  Hmm, what other movies do I like?  OH, "How to Lose a Guy in Ten Days", Love that one too.  Kate Hudson and Matthew McConaughey are awesome together.  Ok, I think that's enough for now, I could go on and on.  You should write to me and let me know what movies you like to watch, I am always on the look out for a good chick flick.  Ok, well, I'm out, have a great day!   """
    ]

# Define texts_positive, texts_negative, and other necessary variables here.
# Create empty lists to store extracted features and class labels
feature_dicts = []

# Iterate through each text to extract features
for text in texts_positive + texts_negative:
    features = {}

    # Tokenize the text
    tokenizer = tf.keras.preprocessing.text.Tokenizer()
    tokenizer.fit_on_texts([text])
    sequences = tokenizer.texts_to_sequences([text])

    # Pad sequences to make them of equal length
    max_sequence_length = max([len(seq) for seq in sequences])
    padded_sequences = pad_sequences(sequences, maxlen=max_sequence_length, padding='post')  # Padding sequences using pad_sequences

    # Convert to NumPy arrays
    padded_sequences = np.array(padded_sequences)

    # Extract additional features
    features['sentence_length'] = sentence_length(text)
    punctuation_freq = punctuation_frequency(text)
    pos_tag_freq = pos_tag_frequency(text)
    features['punctuation_frequency'] = {key: punctuation_freq.get(key, 0) for key in string.punctuation}
    
    # Obtain all possible POS tags from NLTK
    pos_tags = set([tag for _, tag in nltk.pos_tag(nltk.word_tokenize(text))])
    features['pos_tag_frequency'] = {key: pos_tag_freq.get(key, 0) for key in pos_tags}
    
    features['phrase_patterns'] = phrase_patterns(text)
    features['type_token_ratio'] = type_token_ratio(text)
    features['voice_detection'] = voice_detection(text)
    
    # Add class label
    if text in texts_positive:
        features['author_a'] = 1  # Positive class (Author A)
    else:
        features['author_a'] = 0  # Negative class (other authors)

    feature_dicts.append(features)

# Create DataFrame from the list of feature dictionaries
text_features_df = pd.DataFrame(feature_dicts)

# Display the DataFrame
print(text_features_df)


    sentence_length                              punctuation_frequency  \
0          0.157143  {'!': 0.0625, '"': 0, '#': 0, '$': 0, '%': 0, ...   
1          0.213333  {'!': 0, '"': 0, '#': 0, '$': 0, '%': 0, '&': ...   
2          0.123333  {'!': 0, '"': 0, '#': 0, '$': 0, '%': 0, '&': ...   
3          0.085000  {'!': 0, '"': 0, '#': 0, '$': 0, '%': 0, '&': ...   
4          0.120000  {'!': 0.125, '"': 0, '#': 0, '$': 0, '%': 0, '...   
5          0.133000  {'!': 0.041666666666666664, '"': 0.08333333333...   
6          0.710000  {'!': 0, '"': 0, '#': 0, '$': 0, '%': 0, '&': ...   
7          0.118462  {'!': 0, '"': 0, '#': 0, '$': 0, '%': 0, '&': ...   
8          0.111429  {'!': 0, '"': 0, '#': 0, '$': 0, '%': 0, '&': ...   
9          0.166000  {'!': 0, '"': 0.06666666666666667, '#': 0, '$'...   
10         0.156667  {'!': 0.058823529411764705, '"': 0, '#': 0, '$...   
11         0.138750  {'!': 0.0625, '"': 0.0625, '#': 0, '$': 0, '%'...   
12         0.190909  {'!': 0.027777777

In [7]:
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder, LabelEncoder

# Initialize preprocessors
scaler = MinMaxScaler()
encoder = OneHotEncoder()

# Scale numerical features
numerical_features = text_features_df[['sentence_length', 'type_token_ratio']]
scaled_numerical_features = scaler.fit_transform(numerical_features)

# Encode categorical feature
categorical_feature = text_features_df['voice_detection']
encoded_categorical_feature = LabelEncoder().fit_transform(categorical_feature)

# Combine preprocessed features
preprocessed_features = np.concatenate([scaled_numerical_features, encoded_categorical_feature.reshape(-1, 1)], axis=1)

# Extract target labels
labels = text_features_df['author_a'].values



# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(preprocessed_features, labels, test_size=0.2, random_state=42)



In [13]:
import numpy as np
from keras.models import Sequential
from keras.layers import GRU, Dense, Embedding, Dropout
from sklearn.model_selection import train_test_split

# Define your GRU model
model = Sequential()
model.add(Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=max_sequence_length))
model.add(GRU(units=64, return_sequences=True))
model.add(Dropout(0.5))
model.add(GRU(units=32))
model.add(Dense(units=1, activation='sigmoid'))

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(preprocessed_features, labels, test_size=0.2, random_state=42)

# Train the model
history = model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.1)

# Evaluate the model on the test set
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Test Loss: {loss}, Test Accuracy: {accuracy}')


NameError: name 'vocab_size' is not defined

In [10]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GRU, Dense, Dropout

# Reshape input data to include timestep dimension
X_train_reshaped = X_train.reshape(X_train.shape[0], 1, X_train.shape[1])
X_test_reshaped = X_test.reshape(X_test.shape[0], 1, X_test.shape[1])

# Define the GRU model with the correct input shape
model = Sequential([
    GRU(units=128, dropout=0.2, recurrent_dropout=0.2, input_shape=(1, X_train.shape[1])),
    Dense(64, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])


# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Print model summary
model.summary()

# Train the model
model.fit(X_train, y_train, epochs=10, batch_size=16, validation_split=0.1)

# Reshape X_test to match the input shape expected by the model
X_test_reshaped = X_test.reshape(X_test.shape[0], 1, X_test.shape[1])

# Evaluate the model
loss, accuracy = model.evaluate(X_test_reshaped, y_test)
print("Test Accuracy:", accuracy)



Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 gru_4 (GRU)                 (None, 128)               51072     
                                                                 
 dense_8 (Dense)             (None, 64)                8256      
                                                                 
 dropout_4 (Dropout)         (None, 64)                0         
                                                                 
 dense_9 (Dense)             (None, 1)                 65        
                                                                 
Total params: 59393 (232.00 KB)
Trainable params: 59393 (232.00 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
Epoch 1/10


ValueError: in user code:

    File "c:\Users\pc\anaconda3\Lib\site-packages\keras\src\engine\training.py", line 1401, in train_function  *
        return step_function(self, iterator)
    File "c:\Users\pc\anaconda3\Lib\site-packages\keras\src\engine\training.py", line 1384, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "c:\Users\pc\anaconda3\Lib\site-packages\keras\src\engine\training.py", line 1373, in run_step  **
        outputs = model.train_step(data)
    File "c:\Users\pc\anaconda3\Lib\site-packages\keras\src\engine\training.py", line 1150, in train_step
        y_pred = self(x, training=True)
    File "c:\Users\pc\anaconda3\Lib\site-packages\keras\src\utils\traceback_utils.py", line 70, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "c:\Users\pc\anaconda3\Lib\site-packages\keras\src\engine\input_spec.py", line 298, in assert_input_compatibility
        raise ValueError(

    ValueError: Input 0 of layer "sequential_4" is incompatible with the layer: expected shape=(None, 1, 3), found shape=(None, 3)


In [11]:
print("Shape of X_train before reshaping:", X_train.shape)


Shape of X_train before reshaping: (23, 3)


In [26]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Tokenize text data
tokenizer = Tokenizer()
tokenizer.fit_on_texts(texts)

# Convert text data to sequences
sequences = tokenizer.texts_to_sequences(texts)

# Pad sequences to ensure uniform length
max_sequence_length = 100  # Adjust as needed
padded_sequences = pad_sequences(sequences, maxlen=max_sequence_length, padding='post')

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(padded_sequences, labels, test_size=0.2, random_state=42)

# Ensure that the input shape is compatible with the GRU layer
input_shape = (X_train.shape[1],)  # Shape of a single input sample (sequence length)


NameError: name 'texts' is not defined

In [14]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GRU, Dense, Dropout
from sklearn.model_selection import train_test_split

# Extract features and target labels
X = text_features_df.drop(columns=['author_a'])
y = text_features_df['author_a']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the GRU model
model = Sequential([
    GRU(units=128, dropout=0.2, recurrent_dropout=0.2, input_shape=(X_train.shape[1],)),
    Dense(64, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=10, batch_size=16, validation_split=0.1)

# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print("Test Accuracy:", accuracy)


ValueError: Input 0 of layer "gru_1" is incompatible with the layer: expected ndim=3, found ndim=2. Full shape received: (None, 6)

In [5]:
!pip install pyspellchecker

Collecting pyspellchecker
  Obtaining dependency information for pyspellchecker from https://files.pythonhosted.org/packages/e1/d2/c7e3b3a61a34b9320399fa731d1f9f0c73db8a1f28c6764e9e11efa68a29/pyspellchecker-0.8.1-py3-none-any.whl.metadata
  Downloading pyspellchecker-0.8.1-py3-none-any.whl.metadata (9.4 kB)
Downloading pyspellchecker-0.8.1-py3-none-any.whl (6.8 MB)
   ---------------------------------------- 0.0/6.8 MB ? eta -:--:--
   ---------------------------------------- 0.0/6.8 MB ? eta -:--:--
   ---------------------------------------- 0.0/6.8 MB ? eta -:--:--
   ---------------------------------------- 0.0/6.8 MB ? eta -:--:--
   ---------------------------------------- 0.0/6.8 MB ? eta -:--:--
   ---------------------------------------- 0.0/6.8 MB 187.9 kB/s eta 0:00:36
   ---------------------------------------- 0.0/6.8 MB 187.9 kB/s eta 0:00:36
   ---------------------------------------- 0.1/6.8 MB 302.7 kB/s eta 0:00:23
   ---------------------------------------- 0.1/6.8 M

In [1]:
import string
import traceback
from nltk import sent_tokenize, word_tokenize, pos_tag
from spellchecker import SpellChecker
import nltk
import networkx as nx
import matplotlib.pyplot as plt

def get_function_words(text):
    # Define a set of common English function words
    function_words = set(["a", "an", "the", "I", "you", "he", "she", "it", "we", "they",
                          "in", "on", "under", "over", "between", "among",
                          "and", "but", "or", "if", "because",
                          "is", "am", "are", "was", "were", "be", "been", 
                          "have", "has", "had", "do", "does", "did"])

    # Tokenize and lowercase the words in the text
    words = word_tokenize(text.lower())

    # Filter out function words
    function_words_in_text = [word for word in words if word in function_words]

    return function_words_in_text

def generate_ngram_transition_graph(text, n):
    words = word_tokenize(text)
    ngrams = list(bigrams(words)) if n == 2 else list(nltk.ngrams(words, n))
    G = nx.DiGraph()

    for gram in ngrams:
        G.add_edge(gram[0], gram[1])

    return G

def compute_jaccard_similarity(graph1, graph2):
    nodes_set1 = set(graph1.nodes)
    nodes_set2 = set(graph2.nodes)

    intersection = nodes_set1.intersection(nodes_set2)
    union = nodes_set1.union(nodes_set2)

    if len(union) == 0:
        return 0.0  # To handle the case when both graphs are empty

    return len(intersection) / len(union)

def calculate_similarity(text1, text2):
    try:
        # Tokenize sentences
        sentences1 = sent_tokenize(text1)
        sentences2 = sent_tokenize(text2)

        # Tokenize words and get part-of-speech tags
        words1 = [pos_tag(word_tokenize(sentence)) for sentence in sentences1]
        words2 = [pos_tag(word_tokenize(sentence)) for sentence in sentences2]

        # Check spelling using pyspellchecker
        spell = SpellChecker()
        misspelled1 = set([spell.correction(word) for sentence in words1 for (word, tag) in sentence if tag.startswith('N') or tag.startswith('V')])
        misspelled2 = set([spell.correction(word) for sentence in words2 for (word, tag) in sentence if tag.startswith('N') or tag.startswith('V')])

        # Calculate similarity based on sentence structure, spelling, and punctuation frequency
        structure_similarity = len(sentences1) / len(sentences2)
        spelling_similarity = len(misspelled1.intersection(misspelled2)) / len(misspelled1.union(misspelled2))

        # POS Tagging Similarity
        pos_tags_similarity = pos_tag_similarity(words1, words2)

        # Sentence Length Similarity
        length_similarity = sentence_length_similarity(sentences1, sentences2)

        # Punctuation Frequency Similarity
        punctuation_sim = punctuation_similarity(text1, text2)

        # Generate n-gram transition graphs
        n_value = 2  # You can adjust the n-gram size
        graph1 = generate_ngram_transition_graph(text1, n_value)
        graph2 = generate_ngram_transition_graph(text2, n_value)

        # Compute Jaccard similarity between the n-gram word transition graphs
        ngram_similarity = compute_jaccard_similarity(graph1, graph2)
        
        # Extract function words
        function_words1 = get_function_words(text1)
        function_words2 = get_function_words(text2)

        # Additional features: Function word counts or presence/absence
        function_word_count_similarity = len(set(function_words1).intersection(function_words2)) / len(set(function_words1).union(function_words2))

        return structure_similarity, spelling_similarity, pos_tags_similarity, length_similarity, punctuation_sim, function_word_count_similarity,ngram_similarity

    except Exception as e:
        print(f"Error in calculate_similarity: {e}")
        traceback.print_exc()  # Print the full traceback for detailed error information
        return None

def pos_tag_similarity(words1, words2):
    pos_tags1 = [tag for sentence in words1 for (word, tag) in sentence]
    pos_tags2 = [tag for sentence in words2 for (word, tag) in sentence]

    common_tags = set(pos_tags1).intersection(pos_tags2)
    total_tags = set(pos_tags1).union(pos_tags2)

    return len(common_tags) / len(total_tags)

def sentence_length_similarity(sentences1, sentences2):
    avg_len1 = sum(len(sentence) for sentence in sentences1) / len(sentences1)
    avg_len2 = sum(len(sentence) for sentence in sentences2) / len(sentences2)

    return min(avg_len1, avg_len2) / max(avg_len1, avg_len2)

def punctuation_similarity(text1, text2):
    punctuation1 = [char for char in text1 if char in string.punctuation]
    punctuation2 = [char for char in text2 if char in string.punctuation]

    common_punctuations = set(punctuation1).intersection(punctuation2)
    total_punctuations = set(punctuation1).union(punctuation2)

    return len(common_punctuations) / len(total_punctuations)

def main():
    text1 = "This is a sample text. It checks for similarity based on sentence structure, spelling, and punctuation."
    text2 = "This is another sample text. It checks for similarity based on sentence structure, spelling, and punctuation."

    result = calculate_similarity(text1, text2)

    if result is not None:
        structure_similarity, spelling_similarity, pos_tags_similarity, length_similarity, punctuation_sim, function_word_count_similarity,ngram_similarity = result
        print(f"Sentence Structure Similarity: {structure_similarity}")
        print(f"Spelling Similarity: {spelling_similarity}")
        print(f"POS Tag Similarity: {pos_tags_similarity}")
        print(f"Sentence Length Similarity: {length_similarity}")
        print(f"Punctuation Similarity: {punctuation_sim}")
        print(f"N-Gram Word Transition Graph Similarity: {ngram_similarity}")
        print(f"Function Word Count Similarity: {function_word_count_similarity}")
    else:
        print("Error in calculate_similarity. Please check the error message above.")

if __name__ == "__main__":
    main()


Error in calculate_similarity: name 'bigrams' is not defined
Error in calculate_similarity. Please check the error message above.


Traceback (most recent call last):
  File "C:\Users\pc\AppData\Local\Temp\ipykernel_4856\2196734618.py", line 77, in calculate_similarity
    graph1 = generate_ngram_transition_graph(text1, n_value)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\pc\AppData\Local\Temp\ipykernel_4856\2196734618.py", line 27, in generate_ngram_transition_graph
    ngrams = list(bigrams(words)) if n == 2 else list(nltk.ngrams(words, n))
                  ^^^^^^^
NameError: name 'bigrams' is not defined


In [22]:
pip install scikit-learn scipy




In [8]:
import string
import traceback
from nltk import sent_tokenize, word_tokenize, pos_tag
from spellchecker import SpellChecker
import nltk
import networkx as nx
import matplotlib.pyplot as plt
from nltk.util import bigrams  
from scipy import spatial
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import TruncatedSVD
from nltk.tree import Tree

def get_function_words(text):
    # Define a set of common English function words
    function_words = set(["a", "an", "the", "I", "you", "he", "she", "it", "we", "they",
                          "in", "on", "under", "over", "between", "among",
                          "and", "but", "or", "if", "because",
                          "is", "am", "are", "was", "were", "be", "been", 
                          "have", "has", "had", "do", "does", "did"])

    # Tokenize and lowercase the words in the text
    words = word_tokenize(text.lower())

    # Filter out function words
    function_words_in_text = [word for word in words if word in function_words]

    return function_words_in_text

def generate_ngram_transition_graph(text, n):
    words = word_tokenize(text)
    ngrams = list(bigrams(words)) if n == 2 else list(nltk.ngrams(words, n))
    G = nx.DiGraph()

    for gram in ngrams:
        G.add_edge(gram[0], gram[1])

    return G

def compute_jaccard_similarity(graph1, graph2):
    nodes_set1 = set(graph1.nodes)
    nodes_set2 = set(graph2.nodes)

    intersection = nodes_set1.intersection(nodes_set2)
    union = nodes_set1.union(nodes_set2)

    if len(union) == 0:
        return 0.0  # To handle the case when both graphs are empty

    return len(intersection) / len(union)

def calculate_ttr(text):
    # Tokenize the text into words
    words = word_tokenize(text.lower())

    # Count the number of unique words (types) and total number of words (tokens)
    unique_words = set(words)
    num_tokens = len(words)

    # Calculate Type-Token Ratio (TTR)
    if num_tokens > 0:
        ttr = len(unique_words) / num_tokens
    else:
        ttr = 0.0  # Handle the case when the text is empty

    return ttr

def calculate_similarity(text1, text2):
    try:
        # Tokenize sentences
        sentences1 = sent_tokenize(text1)
        sentences2 = sent_tokenize(text2)
       
        # Tokenize words and get part-of-speech tags
        words1 = [pos_tag(word_tokenize(sentence)) for sentence in sentences1]
        words2 = [pos_tag(word_tokenize(sentence)) for sentence in sentences2]

        # Check spelling using pyspellchecker
        spell = SpellChecker()
        misspelled1 = set([spell.correction(word) for sentence in words1 for (word, tag) in sentence if tag.startswith('N') or tag.startswith('V')])
        misspelled2 = set([spell.correction(word) for sentence in words2 for (word, tag) in sentence if tag.startswith('N') or tag.startswith('V')])

        # Calculate similarity based on sentence structure, spelling, and punctuation frequency
        structure_similarity = len(sentences1) / len(sentences2)
        spelling_similarity = len(misspelled1.intersection(misspelled2)) / len(misspelled1.union(misspelled2))

        # POS Tagging Similarity
        pos_tags_similarity = pos_tag_similarity(words1, words2)

        # Sentence Length Similarity
        length_similarity = sentence_length_similarity(sentences1, sentences2)

        # Punctuation Frequency Similarity
        punctuation_sim = punctuation_similarity(text1, text2)

        # Generate n-gram transition graphs
        n_value = 2  # You can adjust the n-gram size
        graph1 = generate_ngram_transition_graph(text1, n_value)
        graph2 = generate_ngram_transition_graph(text2, n_value)

        # Compute Jaccard similarity between the n-gram word transition graphs
        ngram_similarity = compute_jaccard_similarity(graph1, graph2)
        
        # Extract function words
        function_words1 = get_function_words(text1)
        function_words2 = get_function_words(text2)

        # Additional features: Function word counts or presence/absence
        function_word_count_similarity = len(set(function_words1).intersection(function_words2)) / len(set(function_words1).union(function_words2))

        # Apply Latent Semantic Analysis (LSA)
        tfidf_vectorizer = TfidfVectorizer(stop_words='english')
        tfidf_matrix = tfidf_vectorizer.fit_transform([text1, text2])

        svd = TruncatedSVD(n_components=2)  # You can adjust the number of components
        lsa_matrix = svd.fit_transform(tfidf_matrix)

        # LSA Similarity
        lsa_similarity = 1 - spatial.distance.cosine(lsa_matrix[0], lsa_matrix[1])

        # Calculate TTR for each text
        ttr1 = calculate_ttr(text1)
        ttr2 = calculate_ttr(text2)
    
        return structure_similarity, spelling_similarity, pos_tags_similarity, length_similarity, punctuation_sim, function_word_count_similarity, ngram_similarity, lsa_similarity, ttr1, ttr2

    except Exception as e:
        print(f"Error in calculate_similarity: {e}")
        traceback.print_exc()  # Print the full traceback for detailed error information
        return None

def pos_tag_similarity(words1, words2):
    pos_tags1 = [tag for sentence in words1 for (word, tag) in sentence]
    pos_tags2 = [tag for sentence in words2 for (word, tag) in sentence]

    common_tags = set(pos_tags1).intersection(pos_tags2)
    total_tags = set(pos_tags1).union(pos_tags2)

    return len(common_tags) / len(total_tags)

def sentence_length_similarity(sentences1, sentences2):
    avg_len1 = sum(len(sentence) for sentence in sentences1) / len(sentences1)
    avg_len2 = sum(len(sentence) for sentence in sentences2) / len(sentences2)

    return min(avg_len1, avg_len2) / max(avg_len1, avg_len2)

def punctuation_similarity(text1, text2):
    punctuation1 = [char for char in text1 if char in string.punctuation]
    punctuation2 = [char for char in text2 if char in string.punctuation]

    common_punctuations = set(punctuation1).intersection(punctuation2)
    total_punctuations = set(punctuation1).union(punctuation2)

    return len(common_punctuations) / len(total_punctuations)

def main():
    text1 = "This is a sample text. It checks for similarity based on sentence structure, spelling, and punctuation."
    text2 = "This is another sample text. It checks for similarity based on sentence structure, spelling, and punctuation."

    result = calculate_similarity(text1, text2)

    if result is not None:
        structure_similarity, spelling_similarity, pos_tags_similarity, length_similarity, punctuation_sim, function_word_count_similarity, ngram_similarity, lsa_similarity, ttr1, ttr2 = result
        print(f"Sentence Structure Similarity: {structure_similarity}")
        print(f"Spelling Similarity: {spelling_similarity}")
        print(f"POS Tag Similarity: {pos_tags_similarity}")
        print(f"Sentence Length Similarity: {length_similarity}")
        print(f"Punctuation Similarity: {punctuation_sim}")
        print(f"N-Gram Word Transition Graph Similarity: {ngram_similarity}")
        print(f"Function Word Count Similarity: {function_word_count_similarity}")
        print(f"LSA Similarity: {lsa_similarity}")
        print(f"TTR for text 1: {ttr1}")
        print(f"TTR for text 2: {ttr2}")
    else:
        print("Error in calculate_similarity. Please check the error message above.")

if __name__ == "__main__":
    main()


Sentence Structure Similarity: 1.0
Spelling Similarity: 0.9
POS Tag Similarity: 0.9
Sentence Length Similarity: 0.9444444444444444
Punctuation Similarity: 1.0
N-Gram Word Transition Graph Similarity: 0.8947368421052632
Function Word Count Similarity: 0.8
LSA Similarity: 1
TTR for text 1: 0.9
TTR for text 2: 0.9


In [4]:
import string
import traceback
from nltk import sent_tokenize, word_tokenize, pos_tag
from spellchecker import SpellChecker
import nltk
import networkx as nx
import matplotlib.pyplot as plt
from nltk.util import bigrams  
from scipy import spatial
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import TruncatedSVD
from nltk.tree import Tree
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

def get_function_words(text):
    # Define a set of common English function words
    function_words = set(["a", "an", "the", "I", "you", "he", "she", "it", "we", "they",
                          "in", "on", "under", "over", "between", "among",
                          "and", "but", "or", "if", "because",
                          "is", "am", "are", "was", "were", "be", "been", 
                          "have", "has", "had", "do", "does", "did"])

    # Tokenize and lowercase the words in the text
    words = word_tokenize(text.lower())

    # Filter out function words
    function_words_in_text = [word for word in words if word in function_words]

    return function_words_in_text

def generate_ngram_transition_graph(text, n):
    words = word_tokenize(text)
    ngrams = list(bigrams(words)) if n == 2 else list(nltk.ngrams(words, n))
    G = nx.DiGraph()

    for gram in ngrams:
        G.add_edge(gram[0], gram[1])

    return G

def compute_jaccard_similarity(graph1, graph2):
    nodes_set1 = set(graph1.nodes)
    nodes_set2 = set(graph2.nodes)

    intersection = nodes_set1.intersection(nodes_set2)
    union = nodes_set1.union(nodes_set2)

    if len(union) == 0:
        return 0.0  # To handle the case when both graphs are empty

    return len(intersection) / len(union)

def calculate_ttr(text):
    # Tokenize the text into words
    words = word_tokenize(text.lower())

    # Count the number of unique words (types) and total number of words (tokens)
    unique_words = set(words)
    num_tokens = len(words)

    # Calculate Type-Token Ratio (TTR)
    if num_tokens > 0:
        ttr = len(unique_words) / num_tokens
    else:
        ttr = 0.0  # Handle the case when the text is empty

    return ttr

def calculate_similarity(text1, text2):
    try:
        # Tokenize sentences
        sentences1 = sent_tokenize(text1)
        sentences2 = sent_tokenize(text2)
       
        # Tokenize words and get part-of-speech tags
        words1 = [pos_tag(word_tokenize(sentence)) for sentence in sentences1]
        words2 = [pos_tag(word_tokenize(sentence)) for sentence in sentences2]

        # Check spelling using pyspellchecker
        spell = SpellChecker()
        misspelled1 = set([spell.correction(word) for sentence in words1 for (word, tag) in sentence if tag.startswith('N') or tag.startswith('V')])
        misspelled2 = set([spell.correction(word) for sentence in words2 for (word, tag) in sentence if tag.startswith('N') or tag.startswith('V')])

        # Calculate similarity based on sentence structure, spelling, and punctuation frequency
        structure_similarity = len(sentences1) / len(sentences2)
        spelling_similarity = len(misspelled1.intersection(misspelled2)) / len(misspelled1.union(misspelled2))

        # POS Tagging Similarity
        pos_tags_similarity = pos_tag_similarity(words1, words2)

        # Sentence Length Similarity
        length_similarity = sentence_length_similarity(sentences1, sentences2)

        # Punctuation Frequency Similarity
        punctuation_sim = punctuation_similarity(text1, text2)

        # Generate n-gram transition graphs
        n_value = 2  # adjust the n-gram size
        graph1 = generate_ngram_transition_graph(text1, n_value)
        graph2 = generate_ngram_transition_graph(text2, n_value)

        # Compute Jaccard similarity between the n-gram word transition graphs
        ngram_similarity = compute_jaccard_similarity(graph1, graph2)
        
        # Extract function words
        function_words1 = get_function_words(text1)
        function_words2 = get_function_words(text2)

        # Additional features: Function word counts or presence/absence
        function_word_count_similarity = len(set(function_words1).intersection(function_words2)) / len(set(function_words1).union(function_words2))

        # Apply Latent Semantic Analysis (LSA)
        tfidf_vectorizer = TfidfVectorizer(stop_words='english')
        tfidf_matrix = tfidf_vectorizer.fit_transform([text1, text2])

        svd = TruncatedSVD(n_components=2)  # You can adjust the number of components
        lsa_matrix = svd.fit_transform(tfidf_matrix)

        # LSA Similarity
        lsa_similarity = 1 - spatial.distance.cosine(lsa_matrix[0], lsa_matrix[1])

        # Calculate TTR for each text
        ttr1 = calculate_ttr(text1)
        ttr2 = calculate_ttr(text2)
    
        return structure_similarity, spelling_similarity, pos_tags_similarity, length_similarity, punctuation_sim, function_word_count_similarity, ngram_similarity, lsa_similarity, ttr1, ttr2

    except Exception as e:
        print(f"Error in calculate_similarity: {e}")
        traceback.print_exc()  # Print the full traceback for detailed error information
        return None

def pos_tag_similarity(words1, words2):
    pos_tags1 = [tag for sentence in words1 for (word, tag) in sentence]
    pos_tags2 = [tag for sentence in words2 for (word, tag) in sentence]

    common_tags = set(pos_tags1).intersection(pos_tags2)
    total_tags = set(pos_tags1).union(pos_tags2)

    return len(common_tags) / len(total_tags)

def sentence_length_similarity(sentences1, sentences2):
    avg_len1 = sum(len(sentence) for sentence in sentences1) / len(sentences1)
    avg_len2 = sum(len(sentence) for sentence in sentences2) / len(sentences2)

    return min(avg_len1, avg_len2) / max(avg_len1, avg_len2)

def punctuation_similarity(text1, text2):
    punctuation1 = [char for char in text1 if char in string.punctuation]
    punctuation2 = [char for char in text2 if char in string.punctuation]

    common_punctuations = set(punctuation1).intersection(punctuation2)
    total_punctuations = set(punctuation1).union(punctuation2)

    return len(common_punctuations) / len(total_punctuations)

def train_random_forest_model(X_train, y_train):
    # Instantiate Random Forest Regressor
    rf_regressor = RandomForestRegressor(n_estimators=100, random_state=42)  # Adjust hyperparameters as needed

    # Train the model
    rf_regressor.fit(X_train, y_train)

    return rf_regressor

def main():
    # Example data (replace with your actual data)
    texts = [
    "This is a sample text. It checks for similarity based on sentence structure, spelling, and punctuation.",
    "This is another sample text. It checks for similarity based on sentence structure, spelling, and punctuation.",
    "Adding another text sample here for testing purposes.",
    "One more text sample to increase the dataset size."
    ]

    X = []
    similarity_scores = []

    # Calculate similarity features for each pair of texts
    for i in range(len(texts)):
        for j in range(i + 1, len(texts)):
            result = calculate_similarity(texts[i], texts[j])
            if result is not None:
                X.append(result)
                # Assuming the first element of the result tuple is the similarity score
                similarity_scores.append(result[0])
    print("Shape of X:", len(X))
    print("Shape of similarity_scores:", len(similarity_scores))

    # Train-test split
    X_train, X_test, y_train, y_test = train_test_split(X, similarity_scores, test_size=0.2, random_state=42)

    # Train Random Forest model
    rf_model = train_random_forest_model(X_train, y_train)

    # Predict on test set
    y_pred = rf_model.predict(X_test)

    # Evaluate model
    mse = mean_squared_error(y_test, y_pred)
    print("Mean Squared Error:", mse)

if __name__ == "__main__":
    main()


  self.explained_variance_ratio_ = exp_var / full_var


Shape of X: 6
Shape of similarity_scores: 6
Mean Squared Error: 0.1184


In [3]:
import string
import traceback
from nltk import sent_tokenize, word_tokenize, pos_tag
from spellchecker import SpellChecker
import nltk
import networkx as nx
import matplotlib.pyplot as plt
from nltk.util import bigrams  
from scipy import spatial
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import TruncatedSVD
from nltk.tree import Tree
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split


def get_function_words(text):
    function_words = set(["a", "an", "the", "I", "you", "he", "she", "it", "we", "they",
                          "in", "on", "under", "over", "between", "among",
                          "and", "but", "or", "if", "because",
                          "is", "am", "are", "was", "were", "be", "been", 
                          "have", "has", "had", "do", "does", "did"])

    words = word_tokenize(text.lower())
    function_words_in_text = [word for word in words if word in function_words]

    return function_words_in_text

def generate_ngram_transition_graph(text, n):
    words = word_tokenize(text)
    ngrams = list(bigrams(words)) if n == 2 else list(nltk.ngrams(words, n))
    G = nx.DiGraph()

    for gram in ngrams:
        G.add_edge(gram[0], gram[1])

    return G

def compute_jaccard_similarity(graph1, graph2):
    nodes_set1 = set(graph1.nodes)
    nodes_set2 = set(graph2.nodes)

    intersection = nodes_set1.intersection(nodes_set2)
    union = nodes_set1.union(nodes_set2)

    if len(union) == 0:
        return 0.0

    return len(intersection) / len(union)

def calculate_ttr(text):
    words = word_tokenize(text.lower())
    unique_words = set(words)
    num_tokens = len(words)

    if num_tokens > 0:
        ttr = len(unique_words) / num_tokens
    else:
        ttr = 0.0

    return ttr

def calculate_similarity(text1, text2, author1):
    try:
        sentences1 = sent_tokenize(text1)
        sentences2 = sent_tokenize(text2)

        structure_similarity = len(sentences1) / len(sentences2)

        spell = SpellChecker()
        misspelled1 = set([spell.correction(word) for sentence in word_tokenize(text1) for (word, tag) in pos_tag(word_tokenize(sentence)) if tag.startswith('N') or tag.startswith('V')])
        misspelled2 = set([spell.correction(word) for sentence in word_tokenize(text2) for (word, tag) in pos_tag(word_tokenize(sentence)) if tag.startswith('N') or tag.startswith('V')])
        spelling_similarity = len(misspelled1.intersection(misspelled2)) / len(misspelled1.union(misspelled2))

        pos_tags_similarity = pos_tag_similarity([pos_tag(word_tokenize(sentence)) for sentence in sentences1], [pos_tag(word_tokenize(sentence)) for sentence in sentences2])
        length_similarity = sentence_length_similarity(sentences1, sentences2)
        punctuation_sim = punctuation_similarity(text1, text2)

        n_value = 2
        graph1 = generate_ngram_transition_graph(text1, n_value)
        graph2 = generate_ngram_transition_graph(text2, n_value)
        ngram_similarity = compute_jaccard_similarity(graph1, graph2)

        function_words1 = get_function_words(text1)
        function_words2 = get_function_words(text2)
        function_word_count_similarity = len(set(function_words1).intersection(function_words2)) / len(set(function_words1).union(function_words2))

        tfidf_vectorizer = TfidfVectorizer(stop_words='english')
        tfidf_matrix = tfidf_vectorizer.fit_transform([text1, text2])
        svd = TruncatedSVD(n_components=2)
        lsa_matrix = svd.fit_transform(tfidf_matrix)
        lsa_similarity = 1 - spatial.distance.cosine(lsa_matrix[0], lsa_matrix[1])

        ttr1 = calculate_ttr(text1)
        ttr2 = calculate_ttr(text2)
        
        # Determine if the text belongs to Author A (1) or not (0)
        if author1 in text1:
            label = 1
        else:
            label = 0

        return [structure_similarity, spelling_similarity, pos_tags_similarity, length_similarity, punctuation_sim, function_word_count_similarity, ngram_similarity, lsa_similarity, ttr1, ttr2], label

    except Exception as e:
        print(f"Error in calculate_similarity: {e}")
        traceback.print_exc()
        return None, None

def pos_tag_similarity(words1, words2):
    pos_tags1 = [tag for sentence in words1 for (word, tag) in sentence]
    pos_tags2 = [tag for sentence in words2 for (word, tag) in sentence]

    common_tags = set(pos_tags1).intersection(pos_tags2)
    total_tags = set(pos_tags1).union(pos_tags2)

    return len(common_tags) / len(total_tags)

def sentence_length_similarity(sentences1, sentences2):
    avg_len1 = sum(len(sentence) for sentence in sentences1) / len(sentences1)
    avg_len2 = sum(len(sentence) for sentence in sentences2) / len(sentences2)

    return min(avg_len1, avg_len2) / max(avg_len1, avg_len2)

def punctuation_similarity(text1, text2):
    punctuation1 = [char for char in text1 if char in string.punctuation]
    punctuation2 = [char for char in text2 if char in string.punctuation]

    common_punctuations = set(punctuation1).intersection(punctuation2)
    total_punctuations = set(punctuation1).union(punctuation2)

    return len(common_punctuations) / len(total_punctuations)

def train_random_forest_model(X_train, y_train):
    rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
    rf_classifier.fit(X_train, y_train)
    return rf_classifier

def main():
    texts = [
    "This is a sample text. It checks for similarity based on sentence structure, spelling, and punctuation.",
    "This is another sample text. It checks for similarity based on sentence structure, spelling, and punctuation.",
    "Adding another text sample here for testing purposes.",
    "One more text sample to increase the dataset size."
    ]

    X = []
    y = []

    author1 = "AuthorA"

    for i in range(len(texts)):
        for j in range(i + 1, len(texts)):
            result, label = calculate_similarity(texts[i], texts[j], author1)
            if result is not None:
                X.append(result)
                y.append(label)

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    rf_model = train_random_forest_model(X_train, y_train)

    y_pred = rf_model.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)
    print("Accuracy:", accuracy)

    for i, pred in enumerate(y_pred):
        print(f"Text {i+1} author prediction:", "AuthorA" if pred == 1 else "Not AuthorA")

if __name__ == "__main__":
    main()


  self.explained_variance_ratio_ = exp_var / full_var


Accuracy: 1.0
Text 1 author prediction: Not AuthorA
Text 2 author prediction: Not AuthorA


In [11]:
import string
import os
import traceback
from nltk import sent_tokenize, word_tokenize, pos_tag
from spellchecker import SpellChecker
import nltk
import networkx as nx
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import TruncatedSVD
from nltk.util import bigrams  
from scipy import spatial
from nltk.tree import Tree

def extract_features(text):
    try:
        # Skip empty texts
        if not text.strip():
            return None

        # Tokenize sentences
        sentences = sent_tokenize(text)
       
        # Initialize structure similarity to a default value
        structure_similarity = 0.0
        
        if sentences:  # Check if sentences list is not empty
            # Calculate similarity based on sentence structure
            structure_similarity = len(sentences) / len(sentences)

        # Tokenize words and get part-of-speech tags
        words = [pos_tag(word_tokenize(sentence)) for sentence in sentences]

        # Check spelling using pyspellchecker
        spell = SpellChecker()
        misspelled = set([spell.correction(word) for sentence in words for (word, tag) in sentence if tag.startswith('N') or tag.startswith('V')])

        # POS Tagging Similarity
        pos_tags_similarity = pos_tag_similarity(words, words)

        # Sentence Length Similarity
        length_similarity = 1.0

        # Punctuation Frequency Similarity
        punctuation_sim = 1.0

        # Generate n-gram transition graphs
        n_value = 2  # You can adjust the n-gram size
        graph = generate_ngram_transition_graph(text, n_value)

        # Additional features: Function word counts or presence/absence
        function_words = get_function_words(text)

        # Apply Latent Semantic Analysis (LSA)
        tfidf_vectorizer = TfidfVectorizer(stop_words='english')
        tfidf_matrix = tfidf_vectorizer.fit_transform([text])

        svd = TruncatedSVD(n_components=2)  # You can adjust the number of components
        lsa_matrix = svd.fit_transform(tfidf_matrix)

        # Calculate TTR for each text
        ttr = calculate_ttr(text)
    
        return [structure_similarity, pos_tags_similarity, length_similarity, punctuation_sim, len(misspelled), len(set(function_words)), len(graph.nodes), ttr]

    except Exception as e:
        print(f"Error in extract_features: {e}")
        traceback.print_exc()  # Print the full traceback for detailed error information
        return None
    
# Function to load and preprocess data
def load_data(author_a_folder, not_author_a_folder):
    author_a_texts = []
    not_author_a_texts = []
    
    for file_name in os.listdir(author_a_folder):
        file_path = os.path.join(author_a_folder, file_name)
        with open(file_path, 'r', encoding='utf-8') as f:
            author_a_texts.append(f.read())

    for file_name in os.listdir(not_author_a_folder):
        file_path = os.path.join(not_author_a_folder, file_name)
        with open(file_path, 'r', encoding='utf-8') as f:
            not_author_a_texts.append(f.read())

    # Label author_a texts as 1 and not_author_a texts as 0
    X = author_a_texts + not_author_a_texts
    y = [1] * len(author_a_texts) + [0] * len(not_author_a_texts)

    return X, y



def main(author_a_folder, not_author_a_folder):
    # Load and preprocess data
    X, y = load_data(author_a_folder, not_author_a_folder)

    # Extract features from the texts
    X_features = [extract_features(text) for text in X]

    # Remove None values
    X_features = [x for x in X_features if x is not None]

    # Convert feature list to numpy array
    X_features = np.array(X_features)
    y = np.array(y)

    # Split data into train and test sets
    X_train, X_test, y_train, y_test = train_test_split(X_features, y, test_size=0.2, random_state=42)

    # Train a random forest classifier
    clf = RandomForestClassifier(n_estimators=100, random_state=42)
    clf.fit(X_train, y_train)

    # Predict on test set
    y_pred = clf.predict(X_test)

    # Calculate accuracy
    accuracy = accuracy_score(y_test, y_pred)
    print("Accuracy:", accuracy)

if __name__ == "__main__":
    author_a_folder = "Author A"
    not_author_a_folder = "Not Author A"
    main(author_a_folder, not_author_a_folder)


UnicodeDecodeError: 'utf-8' codec can't decode byte 0x92 in position 191: invalid start byte

In [17]:
import string
import os
import traceback
from nltk import sent_tokenize, word_tokenize, pos_tag
from spellchecker import SpellChecker
import nltk
import networkx as nx
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import TruncatedSVD
from nltk.util import bigrams  
from scipy import spatial
from nltk.tree import Tree
from sklearn.utils import shuffle


def calculate_sentence_length_similarity(sentences):
    if len(sentences) == 0:
        return 0.0

    total_length = sum(len(sentence) for sentence in sentences)
    average_length = total_length / len(sentences)

    max_length = max(len(sentence) for sentence in sentences)
    min_length = min(len(sentence) for sentence in sentences)

    if max_length == min_length:
        return 0.0

    normalized_average_length = (average_length - min_length) / (max_length - min_length)

    return normalized_average_length

def calculate_punctuation_similarity(text):
    # Define the set of punctuation marks
    punctuation_marks = set(string.punctuation)

    # Count punctuation marks in the text
    punctuation_counts = {punctuation_mark: text.count(punctuation_mark) for punctuation_mark in punctuation_marks}

    # Compute punctuation similarity as the sum of squared differences between punctuation frequencies
    total_marks = sum(punctuation_counts.values())
    punctuation_frequencies = {mark: count / total_marks for mark, count in punctuation_counts.items()}

    # Compute similarity using squared Euclidean distance between punctuation distributions
    punctuation_sim = 0.0
    for mark in punctuation_marks:
        punctuation_sim += (punctuation_frequencies.get(mark, 0.0) - 1.0 / len(punctuation_marks)) ** 2

    punctuation_sim = 1.0 - punctuation_sim  # Normalize to [0, 1]
    
    return punctuation_sim

def extract_features(text):
    try:
        # Skip empty texts
        if not text.strip():
            return None

        # Tokenize sentences
        sentences = sent_tokenize(text)
       
        # Initialize structure similarity to a default value
        structure_similarity = 0.0
        
        if sentences:  # Check if sentences list is not empty
            # Calculate similarity based on sentence structure
            structure_similarity = len(sentences) / len(sentences)

        # Tokenize words and get part-of-speech tags
        words = [pos_tag(word_tokenize(sentence)) for sentence in sentences]

        # Check spelling using pyspellchecker
        spell = SpellChecker()
        misspelled = set([spell.correction(word) for sentence in words for (word, tag) in sentence if tag.startswith('N') or tag.startswith('V')])

        # POS Tagging Similarity
        pos_tags_similarity = pos_tag_similarity(words, words)

        # Calculate punctuation similarity
        punctuation_sim = calculate_punctuation_similarity(text)

        # Calculate sentence length similarity
        length_similarity = calculate_sentence_length_similarity(sentences)

        # Generate n-gram transition graphs
        n_value = 2  # You can adjust the n-gram size
        graph = generate_ngram_transition_graph(text, n_value)

        # Additional features: Function word counts or presence/absence
        function_words = get_function_words(text)

        tfidf_vectorizer = TfidfVectorizer(stop_words='english')
        tfidf_matrix = tfidf_vectorizer.fit_transform([text])

        svd = TruncatedSVD(n_components=4)  # You can adjust the number of components
        lsa_matrix = svd.fit_transform(tfidf_matrix)

        # Calculate TTR for each text
        ttr = calculate_ttr(text)

        return [structure_similarity, pos_tags_similarity, length_similarity, punctuation_sim, len(misspelled), len(set(function_words)), len(graph.nodes), ttr] + list(lsa_matrix.flatten())
    
    except Exception as e:
        print(f"Error in extract_features: {e}")
        traceback.print_exc()  # Print the full traceback for detailed error information
        return None
    
# Function to load and preprocess data
def load_data(author_a_texts, not_author_a_texts):
    # Label author_a texts as 1 and not_author_a texts as 0
    X = author_a_texts + not_author_a_texts
    y = [1] * len(author_a_texts) + [0] * len(not_author_a_texts)

    return X, y

def main(author_a_texts, not_author_a_texts):
    # Load and preprocess data
    X, y = load_data(author_a_texts, not_author_a_texts)

    # Extract features from the texts
    X_features = [extract_features(text) for text in X]

    # Remove None values
    X_features = [x for x in X_features if x is not None]

    # Convert feature list to numpy array
    X_features = np.array(X_features)
    y = np.array(y)

    # Split data into train and test sets
    X_train, X_test, y_train, y_test = train_test_split(X_features, y, test_size=0.2, random_state=40)

    # Train a random forest classifier (new stuff)
    clf = RandomForestClassifier(n_estimators=100, random_state=40)
    clf.fit(X_train, y_train)

    # Predict on test set
    y_pred = clf.predict(X_test)

    # Calculate accuracy
    accuracy = accuracy_score(y_test, y_pred)
    print("Accuracy:", accuracy)

if __name__ == "__main__":
    author_a_texts = ["""The 4 tile mural I worked a week on went into the kiln,along with everything else, and thankfully everything was ok except the mural.  The underglaze was too thick, and the glaze was too thick (I decided to float glaze the tile for fear of smearing all the black.)  The glaze actually picked up the black, moved it over, and then fired in a big blob in several spots.  Live and learn.  I just don't know if I have it in me to make another one.   I'll have to dig deep for this one.  I guess I should be thinking of the kiln as half full instead of half empty!""",
    """I'm so tired today because I was up all night worrying about the kiln firing.  It smelled something fierce, and I was worried we were all going to die of carbon monoxide poisoning in our sleep.  Plus, I kept hearing banging, which I hope wasn't anything exploding in the kiln, but I haven't found out yet because it's still 600 degrees the next day.""",
    """I happily called the lady about the tile mural that was just set to see how great it looked.  To my suprise, she was very shocked at how warped the tiles look when set.  I was so upset I had nightmares all night and obsessed about it all day. Luckily, she called me back the next day to apologize because she was in a bad mood.  P.s.  Their check was returned the next day.""",
    """Today I must get 100 bisque white tiles today.  My supplier of 7 years has my order of 4 weeks ago delayed in Mexico in customs.  Note to self:  Remember Murphy.  Never assume anything.""",
    """MUST PAY SALES TAXES TODAY OR ELSE!  Something about a deadline is a sure cure to get one off one's butt.  Yesterday placed a free ad in the local paper. Cost for free, and for free stuff-This should be VERY interesting to see the response. This week I should test some cone 5 glazes just for the heck of it.""",
    """fired the last mural.  now I'm depressed.  it's kind of like planning a wedding all year and then the day after you have nothing to do.  Not that I have nothing to do, just no one telling me what I have to do. (work orders) I could have been better prepared with a summer camp to fall into; I'm certainly getting phone calls.  But I was too sick to plan a couple of months ago."If you fail to plan, you plan to fail!"  On another note, I am regretting not buying the small test kiln.  Twice , and now three times I have lost a customer or an opportunity because I didn't have a smaller kiln to do test tiles in or cone 10, or whatever.  I will be getting one soon.""",
    """make hump molds make slab shapes:  babies, women,large star windchimes, small stars make multi-level vase  garden tiles or initial tiles peacock tray baby stuff for daniel-frame w letters, ornament, train plaque,send tiles for footprints(also to Liz) mix colored dipping glazes in quart containers from Smart and Final marketing tools needed: scout flyers party flyers new maps or general flyers summer camp schedule flyers baby footprint postcards fix website babyfootprint gift certificates""",
    """well, the Robinson mural worked out.  4 tiles cracked or broken, all re-painted.  Hope they like them.  The Cordillera mural is getting bigger everyday, now 75 more tiles than anticipated.  They look beautiful going into the kiln.  Double stilting them for less warping.  Had a chance to teach a self-portrait class for children;one of my favorites.  This week had my second student for handmade tiles who wants to go into business as such.  After a little trepidation, I gave into the fact that I am a teacher, and so I teach.  She however wants to learn clay crafting, versus painting, so this is fun.  Got to break open my plaster, and discovered I have a love-hate relationship with plaster carving. Thought it might be easier if I colored the plaster in three after mixing the batch and pour it in layers, so you could see what you are doing. I do love pouring molds.""",
    """I have a school painting on Monday.  For some reason I have a total mental block about the glazes.  Couldn't get the bottles I want,don't want to use the old ones.  Debating which glazes to use, the yucky cheap ones, or the nice expensive ones.  Usually, I have no choice.  I just don't want to spend any more money on half-used glazes. I have a hundred different colors, but not enough of any one to fill 12 bottles.""",
    """Well, I got a call from the mural organizer who asked if the tiles would be ready to view tomorrow.  Of course, I hadn't even re-fired the tiles yet, or done the two "dog paw" accents, or the tile that no one wanted to paint, or the stupid 4 tile center.  So I had to fess up, and beg for more time.  Of course, now that my rear is on the line, I managed to become amazingly inspired and finish the two accent tiles while my kids got ready for school.  I cancelled a doctor's appointment, and painted the stupid center.  Not perfect and beautiful like the last one, but it's there at least.  Now the dilemma of how lazy do I want to be?  Should I leave it black and white, or color it in, and if I color it in, how much color should I bother with?  Or is it passable as it is?  Mostly, I just fear total failure like the last time.""",
    """The great news is the mural I started working on last year, I was asking $6/tile. At that rate I wasn't going to get anywhere, so I asked for $8/tile. I've waited a year, and the lady felt so bad, she said they would give me $10/tile..Yippee!""",
    """Today I need to start mixing glazes for the last tile painting for the school year. The question is can I get 200 of the new nozzle bottles I tried out this weekend by Friday shipped and filled? Or should I go with what I have , which now seems like crap compared. They of course will never know the difference. Re-painting 2 tiles that broke, refiring two broken tiles, and refiring 6" tiles that the glaze didn't flatten out all the way. Must be done asap because they are setting this weekend, and has already been delayed once. Guess I better start re-painting those stupid 4 part mural tiles. Ugh!""",
    """Today I had a glass artist over for a firing.  It was a good excuse to do some research on fused glass.  My past attempt at painted,fused, and slumped glass turned out so-so.  I have some material already, so it would be nice to learn how to use it properly.  She is an older woman, and I had a nice time talking about glass with her; how she sells her stuff (in Venezuela), and just about life as an artist.  I custom programmed my kiln (she usually does it manually).  It was interesting to know that you can open a red hot flaming kiln with glass inside with no breakage.  I can't wait to see what is inside.  Learned about cutting glass, slumping in bisque, applying enamels to gum arabic through a sifter, using elmers glue to stick shards of glass together, using a metallic sharpie to write with on glass, and firing inclusions and dichroic glass.  Like to test frit on clay and glass.  baking soda makes bubbles between glass (use sparingly).Use of fiber paper vs. kiln wash.  slump at a higher temperature and fire paint at a lower temp to keep intensity of color.Use ceramic frames for drop molds (dishes)  Cracked bisque works fine as a glass saggar! """,
    """Attending NCECA in San Diego in 2003 was a turning point for me in many ways. Little did I know when several people asked if I was going, that it was more than I could have imagined. I went reluctantly, tired from work, but curious. When I got there I was lost, and wandered aimlessly, not knowing what I had walked into. I paid my $65, and set off to see what it was that everyone thought was so great. I wandered in and out of lectures and demonstrations. I was most interested in the business lectures, only really wanting to find a way to make a living doing what I love. I wandered through exhibits, spying the mug sale, the cone box contest, and the k-12 children's entries. Was my stuff up to par with the "real" teachers who had a degree? I vowed I would enter next year, just to be competitive.(I didn't , but that's another story). I was really excited to go to the basement area where everyone was selling everything. Tools I didn't know existed, schools beckoning (asking myself, how would my life had been different if I had majored in ceramics, and not married and had children), companies throwing samples my way by the caseloads. Paper, paper, and more paper. The next day was better, knowing that I was there to learn as much as possible in a short time period. I sat through lectures and demos. I absorbed conversations and watched people look and watch. When I got back home to my studio, I wasn't the same. When I left, I was a housewife that had more than a passing interest in a hobby. I was an entrepeneur, trying to find the holy grail that would catapault me from sometimes breaking even to supporting myself. When I came back, I felt like an artist. I realized I knew much more than I thought. I realized that the real world experience I had jumped into blindly had given me more opportunity than most people get in a lifetime of study. I saw my life 20 years from now, and 40 years from now, planning what I would like to do when the kids are grown and this season of my life had passed. I saw myself, 70 years old, touching the clay and asking the questions...... First , when I got back to work, I was engulfed by production and exploring new avenues of business. I taught with a new confidence, that yes, I knew what I was doing with what I did, and everything else would come later. I experimented more, and slowly the studio became a studio, not a storefront. I had an apprentice, and a muse. I would spend hours with the music on, in the silent of my space, pondering the next projects, or working with ferocity. I realized the sacrifices I had made as an artist, in my ventures as a businesswoman. I had no extra time or energy to "create" for the sake of creating, going into the unknown with no "agenda". I did not know what that felt like. I closed the studio. It felt like death. Where was my purpose without a store to support? I hated being just a mother. I almost couldn't do it, and didn't have to. I had renegotiated my lease for pennies. But I knew I had to cut off my arm for another one to grow literally. I moved the studio to my home, like a lot of potters do. I am lucky that I have patient people who live with me that accept the studio taking over the whole of the house. The driveway, the garage, the courtyard, the livingroom, the office, even in the bedroom. They know my sanity lies in it.""",
    """ceramic doorhangers with addons fused in themes: horse,flower ect-blank for dry erase ceramic lightswitch faceplates with addons fused in themes also pour lightswich plates then handbuild over them and around them gifts to do:scriffito doorhangers for stefani,emily,and natalie daniel and also ceramic babybottle bank for daniel and babyblocks frame family tree large tile with handbuilt additions and a "wall" or fence around it GO GET 200 BOTTLES FROM C +C WHEREHOUSE AND PICKUP AND RANDIS""",
    """recently tried a new dipping clear that unfortunately was discontinued due to lead leeching. Won't use it on dinnerware, but, oh my god, it is beautiful. Good thing I didn't return it to the factory like they wanted. Wonder if they'll still sell it with a different label warning. They should! I will write them because they took an uneccessary beating because of the mistake."""]  # Provide a list of texts from author A
    not_author_a_texts = ["""						
        As promised, here's the next instalment of bus mongs.  I bet you've been looking forward to this, haven't you...   2. Bus Monitors  Now, in every walk of life, in every profession, in every place where humans exist there are heirachies.  I accept these heirachies with varying degrees of grace.  But, if there is one thing that makes me want to stick two fingers up to "The Man" and form a rock n' roll band, it's people who assume importance and status without any requirement for them to exist.  I have to be careful here to convey exactly what I mean.  I want you to understand.  Two elderly women on my bus service have elected themselves bus monitors.  As far as I know, there was never any formal nomination.  Let's be clear; these people have assumed the position of bus lords.  This basically involves:  a)  Sitting right behind the driver and shouting conversations at him in a "spirit of the blitz" style dialect.   Eg: "Ooh 'ello Frank, I 'ope you'll be putting yer foot down today, my Bert's expecting his dinner!".   Essentially, mindless, insiduous prattle.  The volume at which these conversations take place cow everyone around them into aural submission.  No-one can read, listening to music is impossible, and quiet chats with friends are verboten.  Essentially, this is an exercise in illustrating that they are friends with the driver, and so assume some of the importance they crave by association.  They rarely look around or even notice other bus people, the bus people they nominally claim to represent.  b) Getting on the bus first.  This is truly the raison d'etre of the bus monitor.  They force themselves, elbows and handbags flailing, onto the buses first for three reasons.  Firstly, this (again) gives them the air of importance and status that they crave.  Secondly, getting on the bus first gives them first choice of seats - they can then position themselves in prime bus real estate for loud driver conversations.  Thirdly, this allows them to have protracted chats with the driver, and fumble for their tickets whilst a large queue stretches back outside getting drenched in the rain.   c)  On the rare occasions where a new driver has been in place (I always feel great sympathy for these hapless footsoldiers, thrust naively onto the battlefield), bus monitors enter a state of heightened awareness.  Not content with shouting often unnecessary directions into the side of the driver's head, they will also offer information on who normally gets on at those stops, whether to wait for them if they aren't there and other classified, bus-monitor-priveleged information.  MI5 themselves would have dossiers less detailed on members of the Taliban.  d) On the rarer still occasions where the bus makes a wrong turning, the bus monitors become a flurry of activity.  "Wrong way!" they shout, whilst looking around incredulously at fellow passengers, as if the driver had defaced a war memorial.  e) Bus monitors are the guardians of bus protocol.  Although they can blatantly disregard other passengers, any kind of ignorance on the part of other passengers is met with disapproving looks.  Any breach of accepted protocol, whether or not you have ever been in this country before, been on a bus before, have the use of your arms and legs etc is met with their clear disgust.    Wedged into their seats with their old-woman paraphanelia, these are actually quite sad individuals.  I can only imagine the voids in their lives must have become slightly less yawning when they found solace in bossing people about on buses.  In two years of bus usage, I have yet to see them justify their self-appointed positions, and on top of it all, they clearly enjoy this.  They act like they are doing me a favour.  If getting on my nerves and stinking of Parma Violets is somehow helping me, I can only marvel at what my shortcomings must have been to start with.  Perhaps I was too relaxed and the bus didn't smell of Parma Violets enough.  We can but wonder.  This is just a small sample of the irritations that these people cause, and for once, I am not just saying that because I can't think of anything else.  It really is just a small sample.""",
    """						
        In case any of you people care, I am one of the hardy souls of this world who commute to work.  Yes, I get the bus.  And I like it.  In fact, a 30-45 minute journey in the morning is an unbelievably relaxing way to get to work.  In a carefully temperature controlled cocoon, you can pop a bit of music on and watch the scenery, leaving all the actual "doing" to someone else.  Namely the driver.  It's difficult to convey the benefits of merely sitting, doing nothing, on an adequately comfortable seat, and not having to worry about anything for half an hour.   This is, in theory, superb.  However, my idyll in this metal tube with wheels is frequently tested by putrid invaders.  Invaders of the worst kind.  Space invaders, if you like.  I have attempted to categorise them in a new series, starting below.  All users of public transport will identify them.  And though they have many names, their presence is unmistakable.    1.  The feckless youngster.  Yesterday a regular user of my bus service, a feckless young girl, brought into sharp focus why I hate other bus people so much.  Perhaps I should avoid the term "bus people", as this either suggests a gypsy-like existence in an abandoned bus, or people who actually resemble buses.  Either way, it's not what I am trying to say.  Basically, I shall now define "bus people" as people who get the bus, in order to avoid confusion.  Anyway, her crimes against me are myriad and serious.  In an international court of bus law (ICBL) she would probably be tried and sentenced to death.   We have a distinct history.  It all started when, about 18 months ago, this individual started to wait at my stop.  Looking little different from the usual slack-jawed windowlickers of my home town, I paid little heed, instead assuming my favourite bus-waiting position of roughly perpendicular to the shelter in order to look up the hill, legs heroically akimbo like the Collossus of Rhodes.  I was somewhat surprised when she boarded the private vehicle which takes me to work.  Perhaps I had misjudged her, despite her appearance and demeanour.   A few weeks passed with respectful silence between us.  All was well, and I felt we had formed an invisible bond of ignoring eachother.  But then, a terrible thing happened. One day, she approached the bus stop, and I was unfortunate enough to momentarily lock eyes with her.  This, as most people would doubtless know, is a pre-cursor to some kind of conversation.  To my alarm, I had discovered that my mouth was open as well.  Snapping it shut, I did my best to rescue the situation.  I noticed that something was different about her... something was amiss.  My mind raced to pin it down.  Of course! Her hair.  She had dyed her hair.    "I like your hair" I said, before the full disastrous impact of what I had done hit me.   I had sparked up a conversation with a bus person!  No more louche days reading in the window seat, listening to the latest grooves.  No more beautiful days watching the speeding countryside.  I would be sucked in, engulfed in this desperate harlot's whirlygig of hair chat.  Maybe the whole situation would escalate to shopping, or worse, work.  Oh cruelest of all fates!!! Why?  Why did my tongue forsake me, when I most needed it to stop it's diabolical dance!  "Oh, thanks, I only di...."  By this time I had run onto the bus.  I couldn't risk more contact or possible friendship with this woman.  She would doubtless destroy what little peace I could wrestle from my day.  More would come of this, I was sure, and indeed it did.  An insidious campaign of irritation followed.  Once, the bus arrived ridiculously early, and we both missed it.  An uncomfortably long period of waiting ensued, before it was clear that no bus would be coming.  I was forced by the situation to offer a non-commital "I think we've missed it".  She rudely turned her back and stormed off, frantically jabbing at her mobile phone.  As we were both bound for the same destination, and we had both missed the same bus, a nice gesture would have been to offer a place in the lift she was undoubtedly arranging (although I would have turned her down on principle).  Instead she glared at me as if I had somehow Karmically arranged the absence of the bus in order to ruin her day.    This week alone, of the 5 days which are busable, she has neglected to have a ticket on 3 days.  This is not only gyppo behaviour, but is also an embarrassing social situation, which I seek to avoid at all times.  All 3 times, she has been "let off" the fare, which has only exponentially increased my contempt for her.  Then there's the running.  I get on the bus first, due to clever kerbside positioning.  She gets on immediately afterwards, and I swear she runs directly behind me, hurrying me along.  I feel obliged to hurl everything into the seat and dive out of her way.  Why she feels the need to hurtle up the bus is a mystery to all except me.  To me, it is but more evidence of her idiocy.    It's clear she thinks she is the J-Lo of the bus community.  Well she got her commupance today alright.  As the bus drew near, some schoolchildren passed us.  Their cries of "She's got a £2 handbag!" were delight to my ears as they systematically humiliated my self-important co-busee, who dresses like someone doing an impression of a character from Sex in the City down on their luck.  Other times the bus has pulled away, as she frantically runs behind it, and I have merely sat, smiling smugly.  Oh, good will have it's days.  But such are the cosmic forces of yin and yan that my victories are only part of a timeless struggle.  One which must be won at all costs. 
    """,
    """						
        They're Good, but Let's Not Start Any Wars Over Them   Well, in a new section of the page, I look at music and decide whether it's any good, for the benefit of you, the reader.  I will call it "My Opinion on Music".  Or "Reviews".  Yeah, that one.       Well, Franz Fedinand (or "The 'Nand" as I haven't christened them) are a Scottish indie type outfit.  That doesn't do them justice - "Indie" is used far too loosely nowadays to have any real meaning.  In this instance, let's take it to mean that they are progressive and slightly non-conformist. What's their sound like? I'll tell you.  They owe a big debt to Tom Verlaine and Television.  That kind of skewed funkiness cut through with some melodious guitar work and bass lines.  Then, in other instances, lead singer Alex Kapranos sounds like a more coquetteish Ian Curtis.  Either way, the mix spells funky and the music spells good. There's flashes of Iggy Pop's The Idiot in the density of some of the tracks, flashes of The Pixies in the pop-artful approach to lyrics.  Bizzarely, some parts of the album also recall Blondie at their Parallel-Lines zenith.  You work it out.  I can't be bothered. I've read and heard comparisons to "The 'Werk" (Kraftwerk).  This is pretty crass on the surface - there's snatches of German on some tracks, which is probably the main reason for the comparisons.  However, having said that, there is an undercurrent of a peculiarly teutonic baroque.  Difficult to pin down, but themes like darkened cinemas and dancing with men called Michael conjure a particularly Weimar atmosphere, in my mind at least. So we've established that their influences are a smorgasbord of left-field  artists.  But what is the driver that make The Nand stand out? Well there are moments of adreneline pumping brilliance.  The type that makes you want to go out and have a fight or run really fast, like all the best music does.  The opener "Jacqueline" is a multi-layered romp which displays a joy for words and sound which is refreshing.  It's slightly self-consciously skewed - it's not full-on absurdity, but has kind of taken a toffee hammer and tapped the norm hard enough to make it less normal.  Rhyming "spectacles" with "erecticles" is one such example.  The barnstorming chorus, which extols the virtues of holidaying is another.  A well rounded debut, all in all, but as a friend said to me after the Stroke's first album - "Where do they go from here?".  They might have just painted themselves into a corner by releasing something so polished so soon. Time will tell, but until that time tells, don't go assassinating any Archdukes.  

    """    ,    """						
    I can't think of anything to write today, so this is going to go one of two ways.  Either I will turn this into an entertaining missive on not being able to write anything, or it will just grind to a halt, teetering precariously on the keep/delete axis.  Nearly ground to a halt after that sentence.  I suppose this hinges now on how long I have to continue for to make this a missive.  I don't know if there is a central agency which sets the length of missives, tracts and statements.  If not, there should be.  It would at least prevent confusion at times like this. 
    """ , """						
    I thought today about forming a Lonely Club.  Not that I'm lonely, but it seemed a compassionate thing to do.  To get lonely people together in a non-threatening atmosphere.  I could send out leaflets which say things like "Spend a lot of time on the Playstation?" or "Lonely?".  I think there would be a good response.  And then I could franchise it out, to other Lonely Co-ordinators - an entire network of Lonely Clubs could spring up, eradicating loneliness forever.  But then I thought, what if no-one turned up?  Could there be anything more tragic than someone organising a Lonely Club meeting and ending up totally alone.  That could push some Lonely Club organisers over the edge.  I suppose they could work with a friend, you know, so they didn't get Lonely. 

    """ , """						
    So I got my Digital Camera and I pretty much have it all figured out, I just need to know how to get pictures posted up on here now.  So off I go to explore and hopefully the next post will have a picture.  WEEEEEEEEEEEEEEEEEEEEE!!!
    """ , """						
    Did you ever wake up one day and everything just seemed to go totally right?    You actually want to get out of bed even though it is 4:00 a.m., your shower is awesome, your son is ready ON TIME for school, you look decent, the road to work is practically flawless and your favorite songs are all playing on the radio, your Mocaccino is Orgasmic and your Boss is in such a chirpy mood...    Well, today is that day for me and it just keeps on getting better.  My Boss told me that today was THE DAY for my bonus (Prefect timing because I have really been wanting that Digital Camera).  My cousin is in town and I rarely see her (she moved to Alberta, then Yellow Knife, now Niagara Falls) and a bunch of us are going to go for drinks tonight so its going to be picture time.  Its not sunny outside, but its warm (mostly humid but it's ok cause I left my hair curly today).  I am going to actually have time to take a full hour lunch and I will get to do so with my dad, brother and cousin.  And last but not least (or maybe Least but not last?) I am actually getting quite a bit of work done (well not right THIS second) so I wont feel guilty this weekend and think about all the things I have to do on Monday.  I dunno, maybe this is lack of sleep talking, but it really is a great day, it doesn't take much to please me huh? LOL  Ok, well back to work I go, have a good weekend.

    """ , """						
    I think I have had enough with men for at least the next 5 years.  Either I am super unlucky or I am a real Bitch (I am leaning more towards the earlier).  I can't seem to meet anyone half decent even if my life depended on it.  I am not talking about a serious, lets be monogamous type of relationship, I am talking about a simple friendship.  I have been talking to this guy for 4 years, Matt, aka Mr. Arkansas, we have shared every little secret (or at least I did) and every thought and fantasy and feeling and out of no where I am a bitch because I demand a little more after four fucking years.    My fuck friend on the other hand... He's in general not too bad... I just only see or hear from him when he wants some, god forbid Cindy has any needs.  There have been other guys over the last 2 years, one so called friend only called me or came by to smoke-up and watch movies when he was single, once he found himself a new fling, bye bye Cindy.  I called him on it the last time it happened, he said he would try and squeeze me in, I told him not to bother and guess what, that was the end of that.  This other guy, claimed he liked me and wanted to date me and so on, it was all BS.  I am ok with casual sex, I am only human and have needs too.  If that's all you want, just be up front about it and whatever decision I make at least it is my fault and I can't blame anyone but myself.  You would think that is pretty simple but no, not in this world.  I don't really know what I want and I am not out there trying to hook up with anyone, however I do know one thing, I want a friend (a male one, actually a woman would do just fine), I guess I am looking for a friend that I can be intimate with and also depend and trust.  I don't know if that makes sense, I'm so confused, I'm so tired of being alone.  Blah!!!
    """ , """						
    One of those killer days where nothing goes right for the boss, and you get blamed or the littlest thing happens and you get tons of shit.  I have a pounding head ache, I haven't had lunch or any break as a matter of fact.  All I want to do is go home, eat and take a long ass MOFO bubble bath.  Ciao!
    """ , """						
    Is my interest in this whole thing waning?  It wouldn't surprise me.  I had a go with a couple of these before.  They turned out rubbish. On the other hand, this is day 2.  You're still here.  So am I.  Both of I.  So this has turned out great!  Maybe a 2nd anniversary party should be arranged.  I have just the people in mind.
    """ , """						
    So I had a new patient yesterday, a man in his 90's, a sweet old man who is probably very lonely.  I always ask my patients how their weekend was or if anything special happened in their week and one thing led to another and I found out that his grandson never visits.  He lives in town and it is so sad that he never goes and visits.  The patient was telling me that he has never even met his great grand kids.  I just couldn't believe it.  I asked him if he had any other family in town and he mentioned a grand daughter.  When she came to pick him up, I pulled her aside and told her she needs to bring her kids to visit their grand father and great grand father.  She said that she would try and make an effort but she didn't really know what to say to her kids.  I was floored.  It's your family, you don't need to say anything special, just go and visit.  They left and I just felt so sad.  I really hope she does make an effort.  I asked my kids if they would ever not visit their grand parents and they said they would be upset if they couldn't visit them.  I hope I am raising them right, I would be so sad if they didn't visit me when I was old and couldn't do things on my own. 

    """ , """						
    Today is the anniversary of Elvis' death.  What do you think happened to Elvis?  Is he still alive?  I don't think he is, I mean look at all the cheeseburgers he ate... However, I am one of those people who like to believe that he is still alive, you know, just chillin, living on some remote island with Tupac and Biggy.  You know, that's really not that weird of a theory. 

    """ , """						
    I'm gonna go ahead and assume that a majority of the people who read this don't watch much t.v. or if you do, its most likely Discovery, History, National Geographic or some other channel that requires you to think a little bit (come on, if you watch the learning channel, you at least have to think a LITTLE).  I too, watch those channels, but every now and then, I like to shut off my brain and watch some mindless crap.  So last night, I watched one of my favorite movies (mainly cause it makes me laugh) "Sweet Home Alabama".  Love it.  If you have never seen it, shame on you!  You need to go and rent it right now, go ahead, I'll wait....   Ok, now that you have seen it, don't you just love it?  It's so cheesy and so predictable but you know what, I love those types of movies.  Another movie I love, "Two Weeks Notice", have you seen it?  Its another good one.  I have to say, Sandra Bullock and Hugh Grant make a great pair.  Oh, and lets not forget "Bridget Jones' Diary", how can you NOT love that movie?  You gotta love Bridget, she's awesome.  Hmm, what other movies do I like?  OH, "How to Lose a Guy in Ten Days", Love that one too.  Kate Hudson and Matthew McConaughey are awesome together.  Ok, I think that's enough for now, I could go on and on.  You should write to me and let me know what movies you like to watch, I am always on the look out for a good chick flick.  Ok, well, I'm out, have a great day!   """
    ]  # Provide a list of texts not from author A
    main(author_a_texts, not_author_a_texts)


  self.explained_variance_ratio_ = exp_var / full_var


Accuracy: 0.6666666666666666


In [4]:
import string
import os
import traceback
from nltk import sent_tokenize, word_tokenize, pos_tag
from spellchecker import SpellChecker
import nltk
import networkx as nx
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import TruncatedSVD
from nltk.util import bigrams  
from scipy import spatial
from nltk.tree import Tree
from sklearn.utils import shuffle
from collections import Counter
from nltk.collocations import BigramCollocationFinder
from nltk.metrics import BigramAssocMeasures
import re

def extract_phrase_patterns(text):
    # Tokenize the text into words
    words = nltk.word_tokenize(text)
    
    # Create a bigram collocation finder
    bigram_finder = BigramCollocationFinder.from_words(words)
    
    # Apply a statistical measure to identify significant collocations
    scored_collocations = bigram_finder.score_ngrams(BigramAssocMeasures.chi_sq)
    
    # Extract collocations that meet a certain threshold
    significant_collocations = [bigram for bigram, score in scored_collocations if score >  3.0]
    
    return significant_collocations

def calculate_punctuation_similarity(text):
    # Define the set of punctuation marks
    punctuation_marks = set(string.punctuation)

    # Count punctuation marks in the text
    punctuation_counts = {punctuation_mark: text.count(punctuation_mark) for punctuation_mark in punctuation_marks}

    # Compute punctuation similarity as the sum of squared differences between punctuation frequencies
    total_marks = sum(punctuation_counts.values())
    punctuation_frequencies = {mark: count / total_marks for mark, count in punctuation_counts.items()}

    # Compute similarity using squared Euclidean distance between punctuation distributions
    punctuation_sim = 0.0
    for mark in punctuation_marks:
        punctuation_sim += (punctuation_frequencies.get(mark, 0.0) - 1.0 / len(punctuation_marks)) ** 2

    punctuation_sim = 1.0 - punctuation_sim  # Normalize to [0, 1]
    
    return punctuation_sim

def calculate_sentence_length_similarity(sentences):
    if len(sentences) == 0:
        return 0.0

    total_length = sum(len(sentence) for sentence in sentences)
    average_length = total_length / len(sentences)

    max_length = max(len(sentence) for sentence in sentences)
    min_length = min(len(sentence) for sentence in sentences)

    if max_length == min_length:
        return 0.0

    normalized_average_length = (average_length - min_length) / (max_length - min_length)

    return normalized_average_length

def pos_tag_similarity(words1, words2):
    pos_tags1 = [tag for sentence in words1 for (word, tag) in sentence]
    pos_tags2 = [tag for sentence in words2 for (word, tag) in sentence]

    common_tags = set(pos_tags1).intersection(pos_tags2)
    total_tags = set(pos_tags1).union(pos_tags2)

    return len(common_tags) / len(total_tags)

def get_function_words(text):
    function_words = set(["a", "an", "the", "I", "you", "he", "she", "it", "we", "they",
                          "in", "on", "under", "over", "between", "among",
                          "and", "but", "or", "if", "because",
                          "is", "am", "are", "was", "were", "be", "been", 
                          "have", "has", "had", "do", "does", "did"])

    words = word_tokenize(text.lower())
    function_words_in_text = [word for word in words if word in function_words]

    return function_words_in_text

def generate_ngram_transition_graph(text, n):
    words = word_tokenize(text)
    ngrams = list(bigrams(words)) if n == 2 else list(nltk.ngrams(words, n))
    G = nx.DiGraph()

    for gram in ngrams:
        G.add_edge(gram[0], gram[1])

    return G

def compute_jaccard_similarity(graph1, graph2):
    nodes_set1 = set(graph1.nodes)
    nodes_set2 = set(graph2.nodes)

    intersection = nodes_set1.intersection(nodes_set2)
    union = nodes_set1.union(nodes_set2)

    if len(union) == 0:
        return 0.0

    return len(intersection) / len(union)

def calculate_ttr(text):
    words = word_tokenize(text.lower())
    unique_words = set(words)
    num_tokens = len(words)

    if num_tokens > 0:
        ttr = len(unique_words) / num_tokens
    else:
        ttr = 0.0

    return ttr

def detect_voice(sentence):
    """
    Detects the voice (active or passive) of a given sentence.

    Parameters:
        sentence (str): The input sentence.

    Returns:
        str: The detected voice ('active' or 'passive').
    """
    # Tokenize the sentence into words and get part-of-speech tags
    words = nltk.word_tokenize(sentence)
    tagged_words = nltk.pos_tag(words)

    # Check for passive voice indicators
    passive_indicators = ['is', 'am', 'are', 'was', 'were', 'been', 'being', 'be', 'by']

    # Check if the sentence contains any passive voice indicators
    if any(tagged_word[0].lower() in passive_indicators for tagged_word in tagged_words):
        return 'passive'
    else:
        return 'active'


from sklearn.preprocessing import StandardScaler

def extract_features(text):
    try:
        # Skip empty texts
        if not text.strip():
            return None

        # Tokenize sentences
        sentences = sent_tokenize(text)
        total_words = len(word_tokenize(text))
       
        # Initialize structure similarity to a default value
        structure_similarity = 0.0
        
        if sentences:  # Check if sentences list is not empty
            # Calculate similarity based on sentence structure
            structure_similarity = len(sentences) / len(sentences)

        # Tokenize words and get part-of-speech tags
        words = [pos_tag(word_tokenize(sentence)) for sentence in sentences]

        # Check spelling using pyspellchecker
        spell = SpellChecker()
        misspelled = set([spell.correction(word) for sentence in words for (word, tag) in sentence if tag.startswith('N') or tag.startswith('V')])

        # Calculate punctuation similarity
        punctuation_sim = calculate_punctuation_similarity(text)

        # Calculate sentence length similarity
        length_similarity = calculate_sentence_length_similarity(sentences)

        # Detect active/passive voice
        active_voice_count = sum(1 for sentence in sentences if detect_voice(sentence) == 'active') / len(sentences)
        passive_voice_count = sum(1 for sentence in sentences if detect_voice(sentence) == 'passive') / len(sentences)

        # Grammar errors
        grammar_errors_count = len(misspelled)

        # Case usage (upper/lower case)
        upper_case_count = sum(1 for char in text if char.isupper())
        lower_case_count = sum(1 for char in text if char.islower())

        # Generate n-gram transition graphs
        n_value = 2  # You can adjust the n-gram size
        graph = generate_ngram_transition_graph(text, n_value)

        phrase_patterns = extract_phrase_patterns(text)

        # Additional features: Function word counts or presence/absence
        function_words = get_function_words(text)

        # Debugging statements
        print("Number of sentences:", len(sentences))
        print("Total words:", total_words)
        print("Structure similarity:", structure_similarity)
        print("Misspelled words:", misspelled)
        print("Punctuation similarity:", punctuation_sim)
        print("Length similarity:", length_similarity)
        print("Active voice count:", active_voice_count)
        print("Passive voice count:", passive_voice_count)
        print("Grammar errors count:", grammar_errors_count)
        print("Upper case count:", upper_case_count)
        print("Lower case count:", lower_case_count)
        print("Number of function words:", len(function_words))
        print("Number of nodes in graph:", len(graph.nodes))
        print("Phrase patterns:", phrase_patterns)

        tfidf_vectorizer = TfidfVectorizer(stop_words='english')
        tfidf_matrix = tfidf_vectorizer.fit_transform([text])

        svd = TruncatedSVD(n_components=4)  # You can adjust the number of components
        lsa_matrix = svd.fit_transform(tfidf_matrix)

        # Debugging statements
        print("TF-IDF matrix shape:", tfidf_matrix.shape)
        print("LSA matrix shape:", lsa_matrix.shape)

        # Calculate TTR for each text
        ttr = calculate_ttr(text)

        # Collect features into a list
        features = [structure_similarity, length_similarity, punctuation_sim, 
                    active_voice_count, passive_voice_count, 
                    grammar_errors_count, upper_case_count, 
                    lower_case_count, len(set(function_words)), len(graph.nodes), ttr] + list(lsa_matrix.flatten())
        
        # Standardize the features
        scaler = StandardScaler()
        features_normalized = scaler.fit_transform([features])

        return features_normalized[0]
    
    except Exception as e:
        print(f"Error in extract_features: {e}")
        traceback.print_exc()  # Print the full traceback for detailed error information
        return None

# Function to load and preprocess data
def load_data(author_a_texts, not_author_a_texts):
    # Label author_a texts as 1 and not_author_a texts as 0
    X = author_a_texts + not_author_a_texts
    y = [1] * len(author_a_texts) + [0] * len(not_author_a_texts)

    return X, y

def main(author_a_texts, not_author_a_texts):
    # Load and preprocess data
    X, y = load_data(author_a_texts, not_author_a_texts)

    # Extract features from the texts
    X_features = [extract_features(text) for text in X]

    # Remove None values
    X_features = [x for x in X_features if x is not None]

    # Convert feature list to numpy array
    X_features = np.array(X_features)
    y = np.array(y)

    # Split data into train and test sets
    X_train, X_test, y_train, y_test = train_test_split(X_features, y, test_size=0.2, random_state=40)

    # Train a random forest classifier (new stuff)
    clf = RandomForestClassifier(n_estimators=100, random_state=40)
    clf.fit(X_train, y_train)

    # Predict on test set
    y_pred = clf.predict(X_test)

    # Calculate accuracy
    accuracy = accuracy_score(y_test, y_pred)
    print("Accuracy:", accuracy)

if __name__ == "__main__":
    author_a_texts = ["""The 4 tile mural I worked a week on went into the kiln,along with everything else, and thankfully everything was ok except the mural.  The underglaze was too thick, and the glaze was too thick (I decided to float glaze the tile for fear of smearing all the black.)  The glaze actually picked up the black, moved it over, and then fired in a big blob in several spots.  Live and learn.  I just don't know if I have it in me to make another one.   I'll have to dig deep for this one.  I guess I should be thinking of the kiln as half full instead of half empty!""",
    """I'm so tired today because I was up all night worrying about the kiln firing.  It smelled something fierce, and I was worried we were all going to die of carbon monoxide poisoning in our sleep.  Plus, I kept hearing banging, which I hope wasn't anything exploding in the kiln, but I haven't found out yet because it's still 600 degrees the next day.""",
    """I happily called the lady about the tile mural that was just set to see how great it looked.  To my suprise, she was very shocked at how warped the tiles look when set.  I was so upset I had nightmares all night and obsessed about it all day. Luckily, she called me back the next day to apologize because she was in a bad mood.  P.s.  Their check was returned the next day.""",
    """Today I must get 100 bisque white tiles today.  My supplier of 7 years has my order of 4 weeks ago delayed in Mexico in customs.  Note to self:  Remember Murphy.  Never assume anything.""",
    """MUST PAY SALES TAXES TODAY OR ELSE!  Something about a deadline is a sure cure to get one off one's butt.  Yesterday placed a free ad in the local paper. Cost for free, and for free stuff-This should be VERY interesting to see the response. This week I should test some cone 5 glazes just for the heck of it.""",
    """fired the last mural.  now I'm depressed.  it's kind of like planning a wedding all year and then the day after you have nothing to do.  Not that I have nothing to do, just no one telling me what I have to do. (work orders) I could have been better prepared with a summer camp to fall into; I'm certainly getting phone calls.  But I was too sick to plan a couple of months ago."If you fail to plan, you plan to fail!"  On another note, I am regretting not buying the small test kiln.  Twice , and now three times I have lost a customer or an opportunity because I didn't have a smaller kiln to do test tiles in or cone 10, or whatever.  I will be getting one soon.""",
    """make hump molds make slab shapes:  babies, women,large star windchimes, small stars make multi-level vase  garden tiles or initial tiles peacock tray baby stuff for daniel-frame w letters, ornament, train plaque,send tiles for footprints(also to Liz) mix colored dipping glazes in quart containers from Smart and Final marketing tools needed: scout flyers party flyers new maps or general flyers summer camp schedule flyers baby footprint postcards fix website babyfootprint gift certificates""",
    """well, the Robinson mural worked out.  4 tiles cracked or broken, all re-painted.  Hope they like them.  The Cordillera mural is getting bigger everyday, now 75 more tiles than anticipated.  They look beautiful going into the kiln.  Double stilting them for less warping.  Had a chance to teach a self-portrait class for children;one of my favorites.  This week had my second student for handmade tiles who wants to go into business as such.  After a little trepidation, I gave into the fact that I am a teacher, and so I teach.  She however wants to learn clay crafting, versus painting, so this is fun.  Got to break open my plaster, and discovered I have a love-hate relationship with plaster carving. Thought it might be easier if I colored the plaster in three after mixing the batch and pour it in layers, so you could see what you are doing. I do love pouring molds.""",
    """I have a school painting on Monday.  For some reason I have a total mental block about the glazes.  Couldn't get the bottles I want,don't want to use the old ones.  Debating which glazes to use, the yucky cheap ones, or the nice expensive ones.  Usually, I have no choice.  I just don't want to spend any more money on half-used glazes. I have a hundred different colors, but not enough of any one to fill 12 bottles.""",
    """Well, I got a call from the mural organizer who asked if the tiles would be ready to view tomorrow.  Of course, I hadn't even re-fired the tiles yet, or done the two "dog paw" accents, or the tile that no one wanted to paint, or the stupid 4 tile center.  So I had to fess up, and beg for more time.  Of course, now that my rear is on the line, I managed to become amazingly inspired and finish the two accent tiles while my kids got ready for school.  I cancelled a doctor's appointment, and painted the stupid center.  Not perfect and beautiful like the last one, but it's there at least.  Now the dilemma of how lazy do I want to be?  Should I leave it black and white, or color it in, and if I color it in, how much color should I bother with?  Or is it passable as it is?  Mostly, I just fear total failure like the last time.""",
    """The great news is the mural I started working on last year, I was asking $6/tile. At that rate I wasn't going to get anywhere, so I asked for $8/tile. I've waited a year, and the lady felt so bad, she said they would give me $10/tile..Yippee!""",
    """Today I need to start mixing glazes for the last tile painting for the school year. The question is can I get 200 of the new nozzle bottles I tried out this weekend by Friday shipped and filled? Or should I go with what I have , which now seems like crap compared. They of course will never know the difference. Re-painting 2 tiles that broke, refiring two broken tiles, and refiring 6" tiles that the glaze didn't flatten out all the way. Must be done asap because they are setting this weekend, and has already been delayed once. Guess I better start re-painting those stupid 4 part mural tiles. Ugh!""",
    """Today I had a glass artist over for a firing.  It was a good excuse to do some research on fused glass.  My past attempt at painted,fused, and slumped glass turned out so-so.  I have some material already, so it would be nice to learn how to use it properly.  She is an older woman, and I had a nice time talking about glass with her; how she sells her stuff (in Venezuela), and just about life as an artist.  I custom programmed my kiln (she usually does it manually).  It was interesting to know that you can open a red hot flaming kiln with glass inside with no breakage.  I can't wait to see what is inside.  Learned about cutting glass, slumping in bisque, applying enamels to gum arabic through a sifter, using elmers glue to stick shards of glass together, using a metallic sharpie to write with on glass, and firing inclusions and dichroic glass.  Like to test frit on clay and glass.  baking soda makes bubbles between glass (use sparingly).Use of fiber paper vs. kiln wash.  slump at a higher temperature and fire paint at a lower temp to keep intensity of color.Use ceramic frames for drop molds (dishes)  Cracked bisque works fine as a glass saggar! """,
    """Attending NCECA in San Diego in 2003 was a turning point for me in many ways. Little did I know when several people asked if I was going, that it was more than I could have imagined. I went reluctantly, tired from work, but curious. When I got there I was lost, and wandered aimlessly, not knowing what I had walked into. I paid my $65, and set off to see what it was that everyone thought was so great. I wandered in and out of lectures and demonstrations. I was most interested in the business lectures, only really wanting to find a way to make a living doing what I love. I wandered through exhibits, spying the mug sale, the cone box contest, and the k-12 children's entries. Was my stuff up to par with the "real" teachers who had a degree? I vowed I would enter next year, just to be competitive.(I didn't , but that's another story). I was really excited to go to the basement area where everyone was selling everything. Tools I didn't know existed, schools beckoning (asking myself, how would my life had been different if I had majored in ceramics, and not married and had children), companies throwing samples my way by the caseloads. Paper, paper, and more paper. The next day was better, knowing that I was there to learn as much as possible in a short time period. I sat through lectures and demos. I absorbed conversations and watched people look and watch. When I got back home to my studio, I wasn't the same. When I left, I was a housewife that had more than a passing interest in a hobby. I was an entrepeneur, trying to find the holy grail that would catapault me from sometimes breaking even to supporting myself. When I came back, I felt like an artist. I realized I knew much more than I thought. I realized that the real world experience I had jumped into blindly had given me more opportunity than most people get in a lifetime of study. I saw my life 20 years from now, and 40 years from now, planning what I would like to do when the kids are grown and this season of my life had passed. I saw myself, 70 years old, touching the clay and asking the questions...... First , when I got back to work, I was engulfed by production and exploring new avenues of business. I taught with a new confidence, that yes, I knew what I was doing with what I did, and everything else would come later. I experimented more, and slowly the studio became a studio, not a storefront. I had an apprentice, and a muse. I would spend hours with the music on, in the silent of my space, pondering the next projects, or working with ferocity. I realized the sacrifices I had made as an artist, in my ventures as a businesswoman. I had no extra time or energy to "create" for the sake of creating, going into the unknown with no "agenda". I did not know what that felt like. I closed the studio. It felt like death. Where was my purpose without a store to support? I hated being just a mother. I almost couldn't do it, and didn't have to. I had renegotiated my lease for pennies. But I knew I had to cut off my arm for another one to grow literally. I moved the studio to my home, like a lot of potters do. I am lucky that I have patient people who live with me that accept the studio taking over the whole of the house. The driveway, the garage, the courtyard, the livingroom, the office, even in the bedroom. They know my sanity lies in it.""",
    """ceramic doorhangers with addons fused in themes: horse,flower ect-blank for dry erase ceramic lightswitch faceplates with addons fused in themes also pour lightswich plates then handbuild over them and around them gifts to do:scriffito doorhangers for stefani,emily,and natalie daniel and also ceramic babybottle bank for daniel and babyblocks frame family tree large tile with handbuilt additions and a "wall" or fence around it GO GET 200 BOTTLES FROM C +C WHEREHOUSE AND PICKUP AND RANDIS""",
    """recently tried a new dipping clear that unfortunately was discontinued due to lead leeching. Won't use it on dinnerware, but, oh my god, it is beautiful. Good thing I didn't return it to the factory like they wanted. Wonder if they'll still sell it with a different label warning. They should! I will write them because they took an uneccessary beating because of the mistake."""]  # Provide a list of texts from author A
    not_author_a_texts = ["""						
        As promised, here's the next instalment of bus mongs.  I bet you've been looking forward to this, haven't you...   2. Bus Monitors  Now, in every walk of life, in every profession, in every place where humans exist there are heirachies.  I accept these heirachies with varying degrees of grace.  But, if there is one thing that makes me want to stick two fingers up to "The Man" and form a rock n' roll band, it's people who assume importance and status without any requirement for them to exist.  I have to be careful here to convey exactly what I mean.  I want you to understand.  Two elderly women on my bus service have elected themselves bus monitors.  As far as I know, there was never any formal nomination.  Let's be clear; these people have assumed the position of bus lords.  This basically involves:  a)  Sitting right behind the driver and shouting conversations at him in a "spirit of the blitz" style dialect.   Eg: "Ooh 'ello Frank, I 'ope you'll be putting yer foot down today, my Bert's expecting his dinner!".   Essentially, mindless, insiduous prattle.  The volume at which these conversations take place cow everyone around them into aural submission.  No-one can read, listening to music is impossible, and quiet chats with friends are verboten.  Essentially, this is an exercise in illustrating that they are friends with the driver, and so assume some of the importance they crave by association.  They rarely look around or even notice other bus people, the bus people they nominally claim to represent.  b) Getting on the bus first.  This is truly the raison d'etre of the bus monitor.  They force themselves, elbows and handbags flailing, onto the buses first for three reasons.  Firstly, this (again) gives them the air of importance and status that they crave.  Secondly, getting on the bus first gives them first choice of seats - they can then position themselves in prime bus real estate for loud driver conversations.  Thirdly, this allows them to have protracted chats with the driver, and fumble for their tickets whilst a large queue stretches back outside getting drenched in the rain.   c)  On the rare occasions where a new driver has been in place (I always feel great sympathy for these hapless footsoldiers, thrust naively onto the battlefield), bus monitors enter a state of heightened awareness.  Not content with shouting often unnecessary directions into the side of the driver's head, they will also offer information on who normally gets on at those stops, whether to wait for them if they aren't there and other classified, bus-monitor-priveleged information.  MI5 themselves would have dossiers less detailed on members of the Taliban.  d) On the rarer still occasions where the bus makes a wrong turning, the bus monitors become a flurry of activity.  "Wrong way!" they shout, whilst looking around incredulously at fellow passengers, as if the driver had defaced a war memorial.  e) Bus monitors are the guardians of bus protocol.  Although they can blatantly disregard other passengers, any kind of ignorance on the part of other passengers is met with disapproving looks.  Any breach of accepted protocol, whether or not you have ever been in this country before, been on a bus before, have the use of your arms and legs etc is met with their clear disgust.    Wedged into their seats with their old-woman paraphanelia, these are actually quite sad individuals.  I can only imagine the voids in their lives must have become slightly less yawning when they found solace in bossing people about on buses.  In two years of bus usage, I have yet to see them justify their self-appointed positions, and on top of it all, they clearly enjoy this.  They act like they are doing me a favour.  If getting on my nerves and stinking of Parma Violets is somehow helping me, I can only marvel at what my shortcomings must have been to start with.  Perhaps I was too relaxed and the bus didn't smell of Parma Violets enough.  We can but wonder.  This is just a small sample of the irritations that these people cause, and for once, I am not just saying that because I can't think of anything else.  It really is just a small sample.""",
    """						
        In case any of you people care, I am one of the hardy souls of this world who commute to work.  Yes, I get the bus.  And I like it.  In fact, a 30-45 minute journey in the morning is an unbelievably relaxing way to get to work.  In a carefully temperature controlled cocoon, you can pop a bit of music on and watch the scenery, leaving all the actual "doing" to someone else.  Namely the driver.  It's difficult to convey the benefits of merely sitting, doing nothing, on an adequately comfortable seat, and not having to worry about anything for half an hour.   This is, in theory, superb.  However, my idyll in this metal tube with wheels is frequently tested by putrid invaders.  Invaders of the worst kind.  Space invaders, if you like.  I have attempted to categorise them in a new series, starting below.  All users of public transport will identify them.  And though they have many names, their presence is unmistakable.    1.  The feckless youngster.  Yesterday a regular user of my bus service, a feckless young girl, brought into sharp focus why I hate other bus people so much.  Perhaps I should avoid the term "bus people", as this either suggests a gypsy-like existence in an abandoned bus, or people who actually resemble buses.  Either way, it's not what I am trying to say.  Basically, I shall now define "bus people" as people who get the bus, in order to avoid confusion.  Anyway, her crimes against me are myriad and serious.  In an international court of bus law (ICBL) she would probably be tried and sentenced to death.   We have a distinct history.  It all started when, about 18 months ago, this individual started to wait at my stop.  Looking little different from the usual slack-jawed windowlickers of my home town, I paid little heed, instead assuming my favourite bus-waiting position of roughly perpendicular to the shelter in order to look up the hill, legs heroically akimbo like the Collossus of Rhodes.  I was somewhat surprised when she boarded the private vehicle which takes me to work.  Perhaps I had misjudged her, despite her appearance and demeanour.   A few weeks passed with respectful silence between us.  All was well, and I felt we had formed an invisible bond of ignoring eachother.  But then, a terrible thing happened. One day, she approached the bus stop, and I was unfortunate enough to momentarily lock eyes with her.  This, as most people would doubtless know, is a pre-cursor to some kind of conversation.  To my alarm, I had discovered that my mouth was open as well.  Snapping it shut, I did my best to rescue the situation.  I noticed that something was different about her... something was amiss.  My mind raced to pin it down.  Of course! Her hair.  She had dyed her hair.    "I like your hair" I said, before the full disastrous impact of what I had done hit me.   I had sparked up a conversation with a bus person!  No more louche days reading in the window seat, listening to the latest grooves.  No more beautiful days watching the speeding countryside.  I would be sucked in, engulfed in this desperate harlot's whirlygig of hair chat.  Maybe the whole situation would escalate to shopping, or worse, work.  Oh cruelest of all fates!!! Why?  Why did my tongue forsake me, when I most needed it to stop it's diabolical dance!  "Oh, thanks, I only di...."  By this time I had run onto the bus.  I couldn't risk more contact or possible friendship with this woman.  She would doubtless destroy what little peace I could wrestle from my day.  More would come of this, I was sure, and indeed it did.  An insidious campaign of irritation followed.  Once, the bus arrived ridiculously early, and we both missed it.  An uncomfortably long period of waiting ensued, before it was clear that no bus would be coming.  I was forced by the situation to offer a non-commital "I think we've missed it".  She rudely turned her back and stormed off, frantically jabbing at her mobile phone.  As we were both bound for the same destination, and we had both missed the same bus, a nice gesture would have been to offer a place in the lift she was undoubtedly arranging (although I would have turned her down on principle).  Instead she glared at me as if I had somehow Karmically arranged the absence of the bus in order to ruin her day.    This week alone, of the 5 days which are busable, she has neglected to have a ticket on 3 days.  This is not only gyppo behaviour, but is also an embarrassing social situation, which I seek to avoid at all times.  All 3 times, she has been "let off" the fare, which has only exponentially increased my contempt for her.  Then there's the running.  I get on the bus first, due to clever kerbside positioning.  She gets on immediately afterwards, and I swear she runs directly behind me, hurrying me along.  I feel obliged to hurl everything into the seat and dive out of her way.  Why she feels the need to hurtle up the bus is a mystery to all except me.  To me, it is but more evidence of her idiocy.    It's clear she thinks she is the J-Lo of the bus community.  Well she got her commupance today alright.  As the bus drew near, some schoolchildren passed us.  Their cries of "She's got a £2 handbag!" were delight to my ears as they systematically humiliated my self-important co-busee, who dresses like someone doing an impression of a character from Sex in the City down on their luck.  Other times the bus has pulled away, as she frantically runs behind it, and I have merely sat, smiling smugly.  Oh, good will have it's days.  But such are the cosmic forces of yin and yan that my victories are only part of a timeless struggle.  One which must be won at all costs. 
    """,
    """						
        They're Good, but Let's Not Start Any Wars Over Them   Well, in a new section of the page, I look at music and decide whether it's any good, for the benefit of you, the reader.  I will call it "My Opinion on Music".  Or "Reviews".  Yeah, that one.       Well, Franz Fedinand (or "The 'Nand" as I haven't christened them) are a Scottish indie type outfit.  That doesn't do them justice - "Indie" is used far too loosely nowadays to have any real meaning.  In this instance, let's take it to mean that they are progressive and slightly non-conformist. What's their sound like? I'll tell you.  They owe a big debt to Tom Verlaine and Television.  That kind of skewed funkiness cut through with some melodious guitar work and bass lines.  Then, in other instances, lead singer Alex Kapranos sounds like a more coquetteish Ian Curtis.  Either way, the mix spells funky and the music spells good. There's flashes of Iggy Pop's The Idiot in the density of some of the tracks, flashes of The Pixies in the pop-artful approach to lyrics.  Bizzarely, some parts of the album also recall Blondie at their Parallel-Lines zenith.  You work it out.  I can't be bothered. I've read and heard comparisons to "The 'Werk" (Kraftwerk).  This is pretty crass on the surface - there's snatches of German on some tracks, which is probably the main reason for the comparisons.  However, having said that, there is an undercurrent of a peculiarly teutonic baroque.  Difficult to pin down, but themes like darkened cinemas and dancing with men called Michael conjure a particularly Weimar atmosphere, in my mind at least. So we've established that their influences are a smorgasbord of left-field  artists.  But what is the driver that make The Nand stand out? Well there are moments of adreneline pumping brilliance.  The type that makes you want to go out and have a fight or run really fast, like all the best music does.  The opener "Jacqueline" is a multi-layered romp which displays a joy for words and sound which is refreshing.  It's slightly self-consciously skewed - it's not full-on absurdity, but has kind of taken a toffee hammer and tapped the norm hard enough to make it less normal.  Rhyming "spectacles" with "erecticles" is one such example.  The barnstorming chorus, which extols the virtues of holidaying is another.  A well rounded debut, all in all, but as a friend said to me after the Stroke's first album - "Where do they go from here?".  They might have just painted themselves into a corner by releasing something so polished so soon. Time will tell, but until that time tells, don't go assassinating any Archdukes.  

    """    ,    """						
    I can't think of anything to write today, so this is going to go one of two ways.  Either I will turn this into an entertaining missive on not being able to write anything, or it will just grind to a halt, teetering precariously on the keep/delete axis.  Nearly ground to a halt after that sentence.  I suppose this hinges now on how long I have to continue for to make this a missive.  I don't know if there is a central agency which sets the length of missives, tracts and statements.  If not, there should be.  It would at least prevent confusion at times like this. 
    """ , """						
    I thought today about forming a Lonely Club.  Not that I'm lonely, but it seemed a compassionate thing to do.  To get lonely people together in a non-threatening atmosphere.  I could send out leaflets which say things like "Spend a lot of time on the Playstation?" or "Lonely?".  I think there would be a good response.  And then I could franchise it out, to other Lonely Co-ordinators - an entire network of Lonely Clubs could spring up, eradicating loneliness forever.  But then I thought, what if no-one turned up?  Could there be anything more tragic than someone organising a Lonely Club meeting and ending up totally alone.  That could push some Lonely Club organisers over the edge.  I suppose they could work with a friend, you know, so they didn't get Lonely. 

    """ , """						
    So I got my Digital Camera and I pretty much have it all figured out, I just need to know how to get pictures posted up on here now.  So off I go to explore and hopefully the next post will have a picture.  WEEEEEEEEEEEEEEEEEEEEE!!!
    """ , """						
    Did you ever wake up one day and everything just seemed to go totally right?    You actually want to get out of bed even though it is 4:00 a.m., your shower is awesome, your son is ready ON TIME for school, you look decent, the road to work is practically flawless and your favorite songs are all playing on the radio, your Mocaccino is Orgasmic and your Boss is in such a chirpy mood...    Well, today is that day for me and it just keeps on getting better.  My Boss told me that today was THE DAY for my bonus (Prefect timing because I have really been wanting that Digital Camera).  My cousin is in town and I rarely see her (she moved to Alberta, then Yellow Knife, now Niagara Falls) and a bunch of us are going to go for drinks tonight so its going to be picture time.  Its not sunny outside, but its warm (mostly humid but it's ok cause I left my hair curly today).  I am going to actually have time to take a full hour lunch and I will get to do so with my dad, brother and cousin.  And last but not least (or maybe Least but not last?) I am actually getting quite a bit of work done (well not right THIS second) so I wont feel guilty this weekend and think about all the things I have to do on Monday.  I dunno, maybe this is lack of sleep talking, but it really is a great day, it doesn't take much to please me huh? LOL  Ok, well back to work I go, have a good weekend.

    """ , """						
    I think I have had enough with men for at least the next 5 years.  Either I am super unlucky or I am a real Bitch (I am leaning more towards the earlier).  I can't seem to meet anyone half decent even if my life depended on it.  I am not talking about a serious, lets be monogamous type of relationship, I am talking about a simple friendship.  I have been talking to this guy for 4 years, Matt, aka Mr. Arkansas, we have shared every little secret (or at least I did) and every thought and fantasy and feeling and out of no where I am a bitch because I demand a little more after four fucking years.    My fuck friend on the other hand... He's in general not too bad... I just only see or hear from him when he wants some, god forbid Cindy has any needs.  There have been other guys over the last 2 years, one so called friend only called me or came by to smoke-up and watch movies when he was single, once he found himself a new fling, bye bye Cindy.  I called him on it the last time it happened, he said he would try and squeeze me in, I told him not to bother and guess what, that was the end of that.  This other guy, claimed he liked me and wanted to date me and so on, it was all BS.  I am ok with casual sex, I am only human and have needs too.  If that's all you want, just be up front about it and whatever decision I make at least it is my fault and I can't blame anyone but myself.  You would think that is pretty simple but no, not in this world.  I don't really know what I want and I am not out there trying to hook up with anyone, however I do know one thing, I want a friend (a male one, actually a woman would do just fine), I guess I am looking for a friend that I can be intimate with and also depend and trust.  I don't know if that makes sense, I'm so confused, I'm so tired of being alone.  Blah!!!
    """ , """						
    One of those killer days where nothing goes right for the boss, and you get blamed or the littlest thing happens and you get tons of shit.  I have a pounding head ache, I haven't had lunch or any break as a matter of fact.  All I want to do is go home, eat and take a long ass MOFO bubble bath.  Ciao!
    """ , """						
    Is my interest in this whole thing waning?  It wouldn't surprise me.  I had a go with a couple of these before.  They turned out rubbish. On the other hand, this is day 2.  You're still here.  So am I.  Both of I.  So this has turned out great!  Maybe a 2nd anniversary party should be arranged.  I have just the people in mind.
    """ , """						
    So I had a new patient yesterday, a man in his 90's, a sweet old man who is probably very lonely.  I always ask my patients how their weekend was or if anything special happened in their week and one thing led to another and I found out that his grandson never visits.  He lives in town and it is so sad that he never goes and visits.  The patient was telling me that he has never even met his great grand kids.  I just couldn't believe it.  I asked him if he had any other family in town and he mentioned a grand daughter.  When she came to pick him up, I pulled her aside and told her she needs to bring her kids to visit their grand father and great grand father.  She said that she would try and make an effort but she didn't really know what to say to her kids.  I was floored.  It's your family, you don't need to say anything special, just go and visit.  They left and I just felt so sad.  I really hope she does make an effort.  I asked my kids if they would ever not visit their grand parents and they said they would be upset if they couldn't visit them.  I hope I am raising them right, I would be so sad if they didn't visit me when I was old and couldn't do things on my own. 

    """ , """						
    Today is the anniversary of Elvis' death.  What do you think happened to Elvis?  Is he still alive?  I don't think he is, I mean look at all the cheeseburgers he ate... However, I am one of those people who like to believe that he is still alive, you know, just chillin, living on some remote island with Tupac and Biggy.  You know, that's really not that weird of a theory. 

    """ 
    ]  # Provide a list of texts not from author A
    main(author_a_texts, not_author_a_texts)

Number of sentences: 7
Total words: 127
Structure similarity: 1.0
Misspelled words: {'tile', 'fired', 'float', 'guess', 'mural', 'was', 'make', 'dig', 'know', 'went', 'Live', 'learn', 'thinking', 'moved', 'picked', 'fear', 'everything', 'kiln', 'blob', 'do', 'glaze', 'smearing', 'be', 'have', 'spots', 'underglaze', 'decided', 'week', 'worked'}
Punctuation similarity: 0.765625
Length similarity: 0.5338983050847458
Active voice count: 0.5714285714285714
Passive voice count: 0.42857142857142855
Grammar errors count: 29
Upper case count: 11
Lower case count: 415
Number of function words: 31
Number of nodes in graph: 78
Phrase patterns: [('actually', 'picked'), ('along', 'with'), ('be', 'thinking'), ('big', 'blob'), ('dig', 'deep'), ('do', "n't"), ('empty', '!'), ('full', 'instead'), ('just', 'do'), ('know', 'if'), ('make', 'another'), ("n't", 'know'), ('ok', 'except'), ('on', 'went'), ('picked', 'up'), ('several', 'spots'), ('should', 'be'), ('smearing', 'all'), ('then', 'fired'), ('too', 

  self.explained_variance_ratio_ = exp_var / full_var
  self.explained_variance_ratio_ = exp_var / full_var


Number of sentences: 6
Total words: 82
Structure similarity: 1.0
Misspelled words: {'mood', 'set', 'mural', 'pas', 'obsessed', 'was', 'shocked', 'had', 'day', 'lady', 'look', 'see', 'looked', 'check', 'returned', 'apologize', 'night', 'tiles', 'called', 'nightmares', 'surprise', 'warped'}
Punctuation similarity: 0.376929012345679
Length similarity: 0.6439393939393939
Active voice count: 0.16666666666666666
Passive voice count: 0.8333333333333334
Grammar errors count: 22
Upper case count: 7
Lower case count: 280
Number of function words: 20
Number of nodes in graph: 54
Phrase patterns: [('Their', 'check'), ('To', 'my'), ('a', 'bad'), ('and', 'obsessed'), ('apologize', 'because'), ('bad', 'mood'), ('had', 'nightmares'), ('in', 'a'), ('look', 'when'), ('me', 'back'), ('mural', 'that'), ('my', 'suprise'), ('night', 'and'), ('shocked', 'at'), ('so', 'upset'), ('tile', 'mural'), ('tiles', 'look'), ('very', 'shocked'), (',', 'she'), ('next', 'day'), ('Luckily', ','), ('all', 'night'), ('at', 

  self.explained_variance_ratio_ = exp_var / full_var
  self.explained_variance_ratio_ = exp_var / full_var


Number of sentences: 5
Total words: 67
Structure similarity: 1.0
Misspelled words: {'get', 'is', 'Yesterday', 'MUST', 'test', None, 'cure', 'see', 'PAY', 'TODAY', 'cone', 'response', 'deadline', 'SALES', 'ad', 'butt', 'glazes', 'be', 'heck', 'ELSE', 'TAXES', 'placed', 'week', 'Cost', 'paper', 'OR', 'Something'}
Punctuation similarity: 0.71875
Length similarity: 0.49803921568627446
Active voice count: 0.6
Passive voice count: 0.4
Grammar errors count: 27
Upper case count: 38
Lower case count: 200
Number of function words: 12
Number of nodes in graph: 53
Phrase patterns: [('!', 'Something'), ("'s", 'butt'), (',', 'and'), ('5', 'glazes'), ('ELSE', '!'), ('MUST', 'PAY'), ('OR', 'ELSE'), ('PAY', 'SALES'), ('SALES', 'TAXES'), ('Something', 'about'), ('TAXES', 'TODAY'), ('TODAY', 'OR'), ('This', 'week'), ('VERY', 'interesting'), ('Yesterday', 'placed'), ('ad', 'in'), ('be', 'VERY'), ('cone', '5'), ('deadline', 'is'), ('glazes', 'just'), ('heck', 'of'), ('local', 'paper'), ('of', 'it'), ('some

  self.explained_variance_ratio_ = exp_var / full_var


Number of sentences: 10
Total words: 156
Structure similarity: 1.0
Misspelled words: {'am', 'fired', 'orders', 'mural', 'plan', 'couple', 'was', 'is', 'customer', 'getting', 'test', 'telling', 'day', 'work', 'buying', 'note', 'nothing', 'regretting', 'kind', 'months', 'did', 'cone', 'wedding', 'planning', 'been', 'summer', 'opportunity', 'kiln', 'do', 'lost', 'be', 'have', 'camp', 'fail', 'tiles', 'times', 'one', 'fall', 'year', 'calls', 'phone'}
Punctuation similarity: 0.8055555555555556
Length similarity: 0.35530303030303034
Active voice count: 0.6
Passive voice count: 0.4
Grammar errors count: 41
Upper case count: 15
Lower case count: 485
Number of function words: 38
Number of nodes in graph: 98
Phrase patterns: [('!', "''"), ("''", 'On'), ("'s", 'kind'), ('(', 'work'), ('Not', 'that'), ('On', 'another'), ('``', 'If'), ('all', 'year'), ('am', 'regretting'), ('an', 'opportunity'), ('another', 'note'), ('been', 'better'), ('better', 'prepared'), ('cone', '10'), ('day', 'after'), ('did

  self.explained_variance_ratio_ = exp_var / full_var


Number of sentences: 1
Total words: 84
Structure similarity: 1.0
Misspelled words: {'stuff', 'ornament', 'make', 'hump', 'fix', 'vase', 'molds', None, 'babies', 'Smart', 'stars', 'schedule', 'footprints', 'containers', 'flyers', 'train', 'scout', 'certificates', 'footprint', 'Final', 'women', 'marketing', 'star', 'mix', 'summer', 'colored', 'tools', 'plaque', 'letters', 'glazes', 'maps', 'lie', 'shapes', 'baby', 'w', 'camp', 'dipping', 'tiles', 'gift', 'peacock', 'needed', 'garden', 'send', 'party', 'postcards'}
Punctuation similarity: 0.7118055555555556
Length similarity: 0.0
Active voice count: 1.0
Passive voice count: 0.0
Grammar errors count: 45
Upper case count: 3
Lower case count: 405
Number of function words: 4
Number of nodes in graph: 68
Phrase patterns: [('(', 'also'), (')', 'mix'), ('Final', 'marketing'), ('Liz', ')'), ('Smart', 'and'), ('also', 'to'), ('and', 'Final'), ('babyfootprint', 'gift'), ('camp', 'schedule'), ('colored', 'dipping'), ('containers', 'from'), ('daniel-

  self.explained_variance_ratio_ = exp_var / full_var


Number of sentences: 13
Total words: 178
Structure similarity: 1.0
Misspelled words: {'am', 'anticipated', 'batch', 'go', 'doing', 'teach', 'mural', 'class', 'business', 'mixing', 'warping', 'is', 'wants', 'Thought', 'getting', 'going', 'had', 'broken', 'are', 'love', 'plaster', 'handmade', 'versus', 'favorites', 'look', 'molds', 'see', 'tilting', 'crafting', 'fun', 'pour', 'break', 'learn', 'Cordillera', 'children', 'clay', 'layers', 'pouring', 'chance', 'fact', 'kiln', 'colored', 'do', 'carving', 'trepidation', 'cracked', 'be', 'discovered', 'have', 'painting', 'week', 'tiles', 'Had', 'worked', 'Got', 'teacher', 'relationship', 'robins', 'student', 'gave', 'Hope'}
Punctuation similarity: 0.6466346153846154
Length similarity: 0.36898061288305195
Active voice count: 0.6923076923076923
Passive voice count: 0.3076923076923077
Grammar errors count: 61
Upper case count: 18
Lower case count: 662
Number of function words: 34
Number of nodes in graph: 120
Phrase patterns: [('75', 'more'), (';

  self.explained_variance_ratio_ = exp_var / full_var


Number of sentences: 7
Total words: 94
Structure similarity: 1.0
Misspelled words: {'hundred', 'get', 'want', 'use', 'bottles', 'fill', 'ones', 'Debating', 'reason', 'money', 'do', 'glazes', 'school', 'colors', 'have', 'painting', 'block', 'choice', 'spend'}
Punctuation similarity: 0.703125
Length similarity: 0.5925925925925926
Active voice count: 1.0
Passive voice count: 0.0
Grammar errors count: 19
Upper case count: 11
Lower case count: 306
Number of function words: 18
Number of nodes in graph: 54
Phrase patterns: [('Debating', 'which'), ('For', 'some'), ('block', 'about'), ('but', 'not'), ('different', 'colors'), ('enough', 'of'), ('fill', '12'), ('hundred', 'different'), ('mental', 'block'), ('more', 'money'), ('nice', 'expensive'), ('no', 'choice'), ('not', 'enough'), ('school', 'painting'), ('some', 'reason'), ('total', 'mental'), ('yucky', 'cheap'), ('have', 'a'), ('do', "n't"), ('I', 'have'), ('12', 'bottles'), ('any', 'more'), ('any', 'one'), ('just', 'do'), ('money', 'on'), (

  self.explained_variance_ratio_ = exp_var / full_var


Number of sentences: 10
Total words: 195
Structure similarity: 1.0
Misspelled words: {'tile', 'organizer', 'managed', 'required', 'rear', 'time', 'is', 'want', 'had', 'doctor', 'painted', 'paint', 'cancelled', 'wanted', 'call', 'got', 'paw', 'line', 'accents', 'view', 'accent', 'fess', 'asked', 'bother', 'color', 'fear', 'beg', 'do', 'school', 'be', 'failure', 'done', 'course', 'appointment', 'tomorrow', 'center', 'become', 'finish', 'dilemma', 'tiles', 'one', 'kids', 'leave'}
Punctuation similarity: 0.7334722222222222
Length similarity: 0.4309523809523809
Active voice count: 0.6
Passive voice count: 0.4
Grammar errors count: 43
Upper case count: 19
Lower case count: 607
Number of function words: 45
Number of nodes in graph: 109
Phrase patterns: [("''", 'accents'), ('Not', 'perfect'), ('Of', 'course'), ('``', 'dog'), ('amazingly', 'inspired'), ('at', 'least'), ('become', 'amazingly'), ('bother', 'with'), ('call', 'from'), ('dilemma', 'of'), ('dog', 'paw'), ('even', 're-fired'), ('fear'

  self.explained_variance_ratio_ = exp_var / full_var


Number of sentences: 3
Total words: 61
Structure similarity: 1.0
Misspelled words: {'get', 'was', 'waited', 'is', 'going', 'i', 'felt', 'started', 'lady', 'give', 'asked', 'said', 'Yippee', 'asking', 'working', 'news', 'rate', 'year', 'eve'}
Punctuation similarity: 0.8409385813148789
Length similarity: 0.5217391304347826
Active voice count: 0.3333333333333333
Passive voice count: 0.6666666666666666
Grammar errors count: 19
Upper case count: 8
Lower case count: 167
Number of function words: 11
Number of nodes in graph: 47
Phrase patterns: [("'ve", 'waited'), ('..', 'Yippee'), ('10/tile', '..'), ('At', 'that'), ('The', 'great'), ('Yippee', '!'), ('asked', 'for'), ('get', 'anywhere'), ('give', 'me'), ('going', 'to'), ('great', 'news'), ('lady', 'felt'), ("n't", 'going'), ('news', 'is'), ('on', 'last'), ('said', 'they'), ('she', 'said'), ('started', 'working'), ('that', 'rate'), ('they', 'would'), ('to', 'get'), ('waited', 'a'), ('working', 'on'), ('would', 'give'), ('.', 'At'), ('6/tile',

  self.explained_variance_ratio_ = exp_var / full_var


Number of sentences: 8
Total words: 124
Structure similarity: 1.0
Misspelled words: {'tile', 'crap', 'get', 'go', 'difference', 'flatten', 'mixing', 'is', 'setting', 'are', 'retiring', 'bottles', 'Guess', 'know', 'part', 'has', 'Ugh', 'did', 'fridge', 'delayed', 'need', 'weekend', 'shipped', 'been', 'broke', 'filled', 'seems', 'glaze', 'school', 'glazes', 'be', 'way', 'have', 'painting', 'course', 'done', 'compared', 'tiles', 'year', 'Today', 'start', 'question', 'tried'}
Punctuation similarity: 0.796875
Length similarity: 0.5721544715447154
Active voice count: 0.75
Passive voice count: 0.25
Grammar errors count: 43
Upper case count: 15
Lower case count: 455
Number of function words: 21
Number of nodes in graph: 93
Phrase patterns: [('4', 'part'), ('6', "''"), ('?', 'Or'), ('Friday', 'shipped'), ('Must', 'be'), ('Or', 'should'), ('Re-painting', '2'), ('The', 'question'), ('Ugh', '!'), ('already', 'been'), ('are', 'setting'), ('asap', 'because'), ('be', 'done'), ('because', 'they'), ('b

  self.explained_variance_ratio_ = exp_var / full_var


Number of sentences: 11
Total words: 243
Structure similarity: 1.0
Misspelled words: {'glass', 'sifter', 'baking', 'gum', 'custom', 'was', 'sharpie', 'frit', 'intensity', 'works', 'paint', 'stick', 'inclusions', 'temperature', 'material', 'artist', 'be', 'have', 'sells', 'paper', 'frames', 'fire', 'excuse', 'bubbles', 'use', 'slumping', 'temp', 'keep', 'see', 'sons', 'sugar', 'attempt', 'cutting', 'clay', 'flaming', 'dishes', 'fused', 'research', 'breakage', 'makes', 'interesting', 'open', 'slump', 'using', 'is', 'had', 'painted', 'elders', 'learn', 'woman', 'life', 'kiln', 'do', 'enamels', 'talking', 'shards', 'Cracked', 'Today', 'stuff', 'firing', 'time', 'test', 'glue', 'molds', 'know', 'write', 'drop', 'bisque', 'Venezuela', 'does', 'Learned', 'turned', 'wait', 'fine', 'soda', 'slumped', 'applying'}
Punctuation similarity: 0.7750771604938271
Length similarity: 0.29623824451410663
Active voice count: 0.5454545454545454
Passive voice count: 0.45454545454545453
Grammar errors count: 7

  self.explained_variance_ratio_ = exp_var / full_var


Number of sentences: 43
Total words: 742
Structure similarity: 1.0
Misspelled words: {'wanting', 'find', 'set', 'breaking', 'tired', 'pennies', 'majored', 'was', 'creating', 'experimented', 'silent', 'confidence', 'demos', 'felt', 'day', 'realized', 'live', 'work', 'enter', 'contest', 'cone', 'did', 'come', 'asked', 'companies', 'muse', 'businesswoman', 'hobby', 'accept', 'been', 'opportunity', 'grail', 'saw', 'taking', 'basement', 'living', 'be', 'artist', 'have', 'avenues', 'working', 'Paper', 'ventures', 'office', 'purpose', 'energy', 'create', 'paper', 'season', 'grown', 'came', 'Was', 'samples', 'selling', 'period', 'became', 'get', 'doing', 'engulfed', 'agenda', 'absorbed', 'projects', 'paid', 'make', 'whole', 'passed', 'like', None, 'got', 'see', 'production', 'demonstrations', 'lease', 'beckoning', 'clay', 'years', 'sat', 'everything', 'house', 'degree', 'closed', 'jumped', 'renegotiated', 'way', 'supporting', 'given', 'par', 'ceramics', 'entries', 'support', 'Tools', 'people',

  self.explained_variance_ratio_ = exp_var / full_var


Number of sentences: 1
Total words: 86
Structure similarity: 1.0
Misspelled words: {'tile', 'faceplates', 'BOTTLES', 'backblocks', 'FROM', 'horse', 'PICKUP', 'GET', None, 'bank', 'GO', 'themes', 'fence', 'wall', 'erase', 'pour', 'adding', 'frame', 'whorehouse', 'family', 'do', 'gifts', 'tree', 'dance', 'plates', 'sgraffito', 'fused', 'C', 'additions', 'AND', 'handbill', "rand's", 'c'}
Punctuation similarity: 0.7966820987654322
Length similarity: 0.0
Active voice count: 1.0
Passive voice count: 0.0
Grammar errors count: 33
Upper case count: 46
Lower case count: 359
Number of function words: 14
Number of nodes in graph: 63
Phrase patterns: [("''", 'or'), ('+C', 'WHEREHOUSE'), ('200', 'BOTTLES'), ('BOTTLES', 'FROM'), ('C', '+C'), ('FROM', 'C'), ('GET', '200'), ('GO', 'GET'), ('``', 'wall'), ('a', '``'), ('addons', 'fused'), ('babyblocks', 'frame'), ('babybottle', 'bank'), ('dry', 'erase'), ('family', 'tree'), ('flower', 'ect-blank'), ('frame', 'family'), ('fused', 'in'), ('gifts', 'to'), 

  self.explained_variance_ratio_ = exp_var / full_var


Number of sentences: 6
Total words: 77
Structure similarity: 1.0
Punctuation similarity: 0.7256944444444444
Length similarity: 0.6229166666666667
Active voice count: 0.6666666666666666
Passive voice count: 0.3333333333333333
Grammar errors count: 24
Upper case count: 6
Lower case count: 294
Number of function words: 21
Number of nodes in graph: 60
TF-IDF matrix shape: (1, 33)
LSA matrix shape: (1, 1)


  self.explained_variance_ratio_ = exp_var / full_var


Number of sentences: 41
Total words: 825
Structure similarity: 1.0
Misspelled words: {'arms', 'instalment', 'form', 'defaced', 'requirement', 'handbags', 'Bus', 'represent', 'flailing', 'was', 'enjoy', 'act', 'anything', 'stick', 'band', 'reasons', 'head', 'turning', 'raisin', 'favor', 'force', 'part', 'elbows', 'assume', 'buses', 'looking', 'enter', 'd', 'did', 'b', 'estate', 'cow', 'accept', 'varying', 'found', 'been', 'dialect', 'beg', 'songs', 'c', 'be', 'have', 'place', 'stops', 'chats', 'promised', 'looks', 'protracted', 'choice', 'rarer', 'driver', 'et', 'hello', 'sympathy', 'detailed', 'feel', 'imagine', 'bossing', 'doing', 'met', 'friends', 'use', 'best', 'drenched', 'state', 'status', 'gets', 'talisman', 'battlefield', 'volume', 'guardians', 'Wedged', 'thing', 'wonder', 'position', None, 'top', 'see', 'bus', 'humans', 'whilst', 'nomination', 'stretches', 'thrust', 'e', 'ignorance', 'foot', 'convey', 'years', 'elected', 'marvel', 'take', 'side', 'disgust', 'Ooh', 'none', 'asso

  self.explained_variance_ratio_ = exp_var / full_var


Number of sentences: 77
Total words: 1183
Structure similarity: 1.0
Misspelled words: {'were', 'humiliated', 'tested', 'sitting', 'worry', 'cries', 'watching', 'positioning', 'was', 'anything', 'cocoon', 'pin', 'crimes', 'felt', 'impact', 'day', 'user', 'speeding', 'brought', 'work', 'part', 'relaxing', 'takes', 'temperature', 'buses', 'hill', 'community', 'focus', 'months', 'hair', 'did', 'come', 'transport', 'having', 'shelter', 'grooves', 'demeanor', 'tongue', 'yin', 'need', 'eyes', 'hurrying', 'fates', 'been', 'fact', 'seat', 'formed', 'shopping', 'girl', 'be', 'discovered', 'have', 'stop', 'hate', 'doubtless', 'course', 'thanks', 'place', 'idyll', 'peace', 'handbag', 'ears', 'can', 'hour', 'souls', 'boarded', 'driver', 'tried', 'feel', 'period', 'momentarily', 'fare', 'destination', 'clever', 'get', 'doing', 'starting', 'individual', 'leaving', 'engulfed', 'embarrassing', 'let', 'drew', 'chat', 'bit', 'paid', 'swear', 'bond', 'say', 'something', 'hurtle', 'Invaders', 'gets', 'pass

  self.explained_variance_ratio_ = exp_var / full_var


Number of sentences: 32
Total words: 545
Structure similarity: 1.0
Misspelled words: {'were', 'outfit', 'German', 'Rhyming', 'holidaying', 'crass', 'pin', 'Time', 'surface', 'brilliance', 'work', 'dancing', 'call', 'singer', 'sound', 'refreshing', 'smörgåsbord', 'Opinion', 'Well', 'type', 'established', 'flashes', 'having', 'parts', 'cinemas', 'reason', 'spectacles', 'releasing', 'heard', 'extols', 'indie', 'instance', 'be', 'baroque', 'have', 'debut', 'lyrics', 'absurdity', 'bass', 'wear', 'snatches', 'driver', 'erectile', 'let', 'benefit', 'something', 'make', 'Indie', None, 'craftwork', 'influences', 'justice', 'artists', 'vervain', 'approach', 'stand', 'toffee', 'christened', 'instances', 'take', 'corner', 'way', 'Difficult', 'density', 'decide', 'Idiot', 'mean', 'makes', 'called', 'comparisons', 'Let', 'Pixies', 'sopranos', 'Music', 'undercurrent', 'mind', 'tracks', 'assassinating', 'is', 'debt', 'album', 'example', 'frank', 'are', 'painted', 'polished', 'Stroke', 'run', 'look', '

  self.explained_variance_ratio_ = exp_var / full_var


Number of sentences: 7
Total words: 121
Structure similarity: 1.0
Misspelled words: {'think', 'sets', 'go', 'today', 'is', 'missives', 'anything', 'going', 'make', 'missive', 'statements', 'hinges', 'write', 'ways', 'know', 'being', 'suppose', 'halt', 'agency', 'sentence', 'grind', 'continue', 'confusion', 'do', 'tracts', 'length', 'teetering', 'be', 'turn', 'have', 'ground', 'axis', 'times'}
Punctuation similarity: 0.6801388888888888
Length similarity: 0.3913894324853229
Active voice count: 0.42857142857142855
Passive voice count: 0.5714285714285714
Grammar errors count: 33
Upper case count: 9
Lower case count: 433
Number of function words: 21
Number of nodes in graph: 79
Phrase patterns: [('It', 'would'), ('Nearly', 'ground'), ('after', 'that'), ('agency', 'which'), ('an', 'entertaining'), ('and', 'statements'), ('being', 'able'), ('central', 'agency'), ('continue', 'for'), ('go', 'one'), ('hinges', 'now'), ('how', 'long'), ('into', 'an'), ('just', 'grind'), ('keep/delete', 'axis'), 

  self.explained_variance_ratio_ = exp_var / full_var


Number of sentences: 11
Total words: 156
Structure similarity: 1.0
Misspelled words: {'am', 'people', 'leaflets', 'think', 'things', 'get', 'today', 'atmosphere', 'lot', 'time', 'anything', 'say', 'franchise', 'organizing', 'spring', 'network', 'thing', 'organizers', 'loneliness', 'work', 'know', 'thought', 'Lonely', 'ending', 'friend', 'eradicating', 'meeting', 'push', 'response', 'did', 'suppose', 'someone', 'Spend', 'coordinators', 'turned', 'do', 'Club', 'be', 'none', 'edge', 'forming', 'seemed', 'send', 'Clubs', 'plantation'}
Punctuation similarity: 0.8246173469387755
Length similarity: 0.41243315508021394
Active voice count: 0.8181818181818182
Passive voice count: 0.18181818181818182
Grammar errors count: 45
Upper case count: 27
Lower case count: 573
Number of function words: 28
Number of nodes in graph: 101
Phrase patterns: [('-', 'an'), ('Co-ordinators', '-'), ('Not', 'that'), ('about', 'forming'), ('an', 'entire'), ('and', 'ending'), ('anything', 'more'), ('compassionate', 'th

  self.explained_variance_ratio_ = exp_var / full_var


Number of sentences: 4
Total words: 51
Structure similarity: 1.0
Misspelled words: {'posted', 'have', 'Digital', 'get', 'go', 'picture', 'need', 'know', None, 'Camera', 'pictures', 'got', 'figured', 'explore', 'post'}
Punctuation similarity: 0.6423611111111112
Length similarity: 0.4137323943661972
Active voice count: 1.0
Passive voice count: 0.0
Grammar errors count: 15
Upper case count: 30
Lower case count: 150
Number of function words: 8
Number of nodes in graph: 40
Phrase patterns: [('Digital', 'Camera'), ('a', 'picture'), ('all', 'figured'), ('figured', 'out'), ('get', 'pictures'), ('got', 'my'), ('here', 'now'), ('hopefully', 'the'), ('it', 'all'), ('just', 'need'), ('know', 'how'), ('my', 'Digital'), ('next', 'post'), ('on', 'here'), ('out', ','), ('pictures', 'posted'), ('post', 'will'), ('posted', 'up'), ('pretty', 'much'), ('the', 'next'), ('up', 'on'), ('.', 'WEEEEEEEEEEEEEEEEEEEEE'), ('Camera', 'and'), ('So', 'off'), ('and', 'hopefully'), ('explore', 'and'), ('have', 'a'), (

  self.explained_variance_ratio_ = exp_var / full_var


Number of sentences: 10
Total words: 314
Structure similarity: 1.0
Misspelled words: {'wanting', 'bunch', 'of', 'was', 'Knife', 'please', 'keeps', 'tonight', 'told', 'day', 'work', 'Yellow', 'Well', 'hair', 'been', 'songs', 'be', 'have', 'hour', 'Orgasmic', 'Least', 'feel', 'get', 'chirpy', 'bit', 'bonus', None, 'town', 'see', 'lack', 'nagana', 'warm', 'Did', 'everything', 'weekend', 'take', 'wont', 'done', 'huh', 'Digital', 'shower', 'ON', 'cousin', 'am', 'humid', 'mood', 'Boss', 'playing', 'today', 'Camera', 'is', 'getting', 'going', 'radio', 'are', 'brother', 'look', 'bed', 'Falls', 'drinks', 'DAY', 'money', 'lunch', 'TIME', 'road', 'do', 'school', 'talking', 'son', 'left', 'dunno', 'think', 'things', 'go', 'lot', 'want', 'time', 'dad', 'sleep', 'wake', 'alert', 'moved', 'does', 'timing', 'THIS', 'seemed', 'cause'}
Punctuation similarity: 0.7970693618171985
Length similarity: 0.25938461538461544
Active voice count: 0.4
Passive voice count: 0.6
Grammar errors count: 87
Upper case cou

  self.explained_variance_ratio_ = exp_var / full_var


Number of sentences: 18
Total words: 429
Structure similarity: 1.0
Misspelled words: {'enough', 'needs', 'was', 'wants', 'try', 'candy', 'shared', 'told', 'wanted', 'sense', 'guys', 'looking', 'type', 'claimed', 'Blah', 'did', 'bother', 'found', 'been', 'decision', 'Matt', 'be', 'have', 'depended', 'relationship', 'came', 'fault', 'guess', 'towards', 'make', 'hand', 'half', 'thing', 'guy', 'trust', None, 'see', 'depend', 'years', 'seem', 'sex', 'blame', 'makes', 'called', 'forbid', 'male', 'am', 'lets', 'aka', 'Bitch', 'demand', 'world', 'feeling', 'is', 'had', 'movies', 'anyone', 'being', 'life', 'woman', 'do', 'men', 'bitch', 'talking', 'think', 'trying', 'liked', 'fling', 'leaning', 'time', 'want', 'my', 'know', 'has', 'thought', 'fantasy', 'friend', 'hear', 'hook', 'watch', 'said', 'happened', 'fucking', 'squeeze', 'fine', 'bye', 'friendship', 'end', 'date', 'meet'}
Punctuation similarity: 0.7549752861602498
Length similarity: 0.36823626600578274
Active voice count: 0.3888888888888

  self.explained_variance_ratio_ = exp_var / full_var
  self.explained_variance_ratio_ = exp_var / full_var


Number of sentences: 11
Total words: 78
Structure similarity: 1.0
Misspelled words: {'interest', 'am', 'people', 'waning', 'go', 'mind', 'couple', 'is', 'had', 'are', 'hand', 'day', 'thing', 'has', 'Is', 'arranged', 'turned', 'be', 'have', 'surprise', 'party'}
Punctuation similarity: 0.5822704081632653
Length similarity: 0.46868686868686865
Active voice count: 0.6363636363636364
Passive voice count: 0.36363636363636365
Grammar errors count: 21
Upper case count: 13
Lower case count: 227
Number of function words: 18
Number of nodes in graph: 57
Phrase patterns: [('!', 'Maybe'), ("'re", 'still'), ('2nd', 'anniversary'), ('?', 'It'), ('Is', 'my'), ('It', 'would'), ('You', "'re"), ('anniversary', 'party'), ('be', 'arranged'), ('day', '2'), ('go', 'with'), ('great', '!'), ('hand', ','), ('have', 'just'), ('is', 'day'), ('my', 'interest'), ("n't", 'surprise'), ('other', 'hand'), ('party', 'should'), ('should', 'be'), ('still', 'here'), ('surprise', 'me'), ('these', 'before'), ('thing', 'wanin

  self.explained_variance_ratio_ = exp_var / full_var
  self.explained_variance_ratio_ = exp_var / full_var


Number of sentences: 6
Total words: 86
Structure similarity: 1.0
Misspelled words: {'am', 'think', 'people', 'death', 'tuna', 'alive', 'island', 'anniversary', 'is', 'cheeseburgers', 'like', 'look', 'know', 'weird', 'chilling', 'Is', 'theory', 'elves', 'happened', 'do', 'believe', 'living', 'ate', 'mean', 'piggy', 'Today'}
Punctuation similarity: 0.7371323529411764
Length similarity: 0.3333333333333333
Active voice count: 0.3333333333333333
Passive voice count: 0.6666666666666666
Grammar errors count: 26
Upper case count: 12
Lower case count: 271
Number of function words: 19
Number of nodes in graph: 58
Phrase patterns: [("'", 'death'), ("'s", 'really'), ('...', 'However'), ('Tupac', 'and'), ('a', 'theory'), ('am', 'one'), ('and', 'Biggy'), ('at', 'all'), ('ate', '...'), ('island', 'with'), ('just', 'chillin'), ('living', 'on'), ('look', 'at'), ('mean', 'look'), ('on', 'some'), ('people', 'who'), ('really', 'not'), ('remote', 'island'), ('some', 'remote'), ('still', 'alive'), ('those',

  self.explained_variance_ratio_ = exp_var / full_var


In [9]:
!pip install tensorflow



In [17]:
import string
import os
import traceback
from nltk import sent_tokenize, word_tokenize, pos_tag
from spellchecker import SpellChecker
import nltk
import networkx as nx
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import TruncatedSVD
from nltk.util import bigrams  
from scipy import spatial
from nltk.tree import Tree
from sklearn.utils import shuffle
from collections import Counter
from nltk.collocations import BigramCollocationFinder
from nltk.metrics import BigramAssocMeasures
import re
# Import TensorFlow/Keras
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, GRU, Dense, GlobalAveragePooling1D
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import confusion_matrix

def extract_phrase_patterns(text):
    # Tokenize the text into words
    words = nltk.word_tokenize(text)
    
    # Create a bigram collocation finder
    bigram_finder = BigramCollocationFinder.from_words(words)
    
    # Apply a statistical measure to identify significant collocations
    scored_collocations = bigram_finder.score_ngrams(BigramAssocMeasures.chi_sq)
    
    # Extract collocations that meet a certain threshold
    significant_collocations = [bigram for bigram, score in scored_collocations if score >  3.0]
    
    return significant_collocations

def calculate_punctuation_similarity(text):
    # Define the set of punctuation marks
    punctuation_marks = set(string.punctuation)

    # Count punctuation marks in the text
    punctuation_counts = {punctuation_mark: text.count(punctuation_mark) for punctuation_mark in punctuation_marks}

    # Compute punctuation similarity as the sum of squared differences between punctuation frequencies
    total_marks = sum(punctuation_counts.values())
    punctuation_frequencies = {mark: count / total_marks for mark, count in punctuation_counts.items()}

    # Compute similarity using squared Euclidean distance between punctuation distributions
    punctuation_sim = 0.0
    for mark in punctuation_marks:
        punctuation_sim += (punctuation_frequencies.get(mark, 0.0) - 1.0 / len(punctuation_marks)) ** 2

    punctuation_sim = 1.0 - punctuation_sim  # Normalize to [0, 1]
    
    return punctuation_sim

def calculate_sentence_length_similarity(sentences):
    if len(sentences) == 0:
        return 0.0

    total_length = sum(len(sentence) for sentence in sentences)
    average_length = total_length / len(sentences)

    max_length = max(len(sentence) for sentence in sentences)
    min_length = min(len(sentence) for sentence in sentences)

    if max_length == min_length:
        return 0.0

    normalized_average_length = (average_length - min_length) / (max_length - min_length)

    return normalized_average_length

def pos_tag_similarity(words1, words2):
    pos_tags1 = [tag for sentence in words1 for (word, tag) in sentence]
    pos_tags2 = [tag for sentence in words2 for (word, tag) in sentence]

    common_tags = set(pos_tags1).intersection(pos_tags2)
    total_tags = set(pos_tags1).union(pos_tags2)

    return len(common_tags) / len(total_tags)

def get_function_words(text):
    function_words = set(["a", "an", "the", "I", "you", "he", "she", "it", "we", "they",
                          "in", "on", "under", "over", "between", "among",
                          "and", "but", "or", "if", "because",
                          "is", "am", "are", "was", "were", "be", "been", 
                          "have", "has", "had", "do", "does", "did"])

    words = word_tokenize(text.lower())
    function_words_in_text = [word for word in words if word in function_words]

    return function_words_in_text

def generate_ngram_transition_graph(text, n):
    words = word_tokenize(text)
    ngrams = list(bigrams(words)) if n == 2 else list(nltk.ngrams(words, n))
    G = nx.DiGraph()

    for gram in ngrams:
        G.add_edge(gram[0], gram[1])

    return G

def compute_jaccard_similarity(graph1, graph2):
    nodes_set1 = set(graph1.nodes)
    nodes_set2 = set(graph2.nodes)

    intersection = nodes_set1.intersection(nodes_set2)
    union = nodes_set1.union(nodes_set2)

    if len(union) == 0:
        return 0.0

    return len(intersection) / len(union)

def calculate_ttr(text):
    words = word_tokenize(text.lower())
    unique_words = set(words)
    num_tokens = len(words)

    if num_tokens > 0:
        ttr = len(unique_words) / num_tokens
    else:
        ttr = 0.0

    return ttr

def detect_voice(sentence):
    """
    Detects the voice (active or passive) of a given sentence.

    Parameters:
        sentence (str): The input sentence.

    Returns:
        str: The detected voice ('active' or 'passive').
    """
    # Tokenize the sentence into words and get part-of-speech tags
    words = nltk.word_tokenize(sentence)
    tagged_words = nltk.pos_tag(words)

    # Check for passive voice indicators
    passive_indicators = ['is', 'am', 'are', 'was', 'were', 'been', 'being', 'be', 'by']

    # Check if the sentence contains any passive voice indicators
    if any(tagged_word[0].lower() in passive_indicators for tagged_word in tagged_words):
        return 'passive'
    else:
        return 'active'


from sklearn.preprocessing import StandardScaler

def extract_features(text):
    try:
        # Skip empty texts
        if not text.strip():
            return None

        # Tokenize sentences
        sentences = sent_tokenize(text)
        total_words = len(word_tokenize(text))
       
        # Initialize structure similarity to a default value
        structure_similarity = 0.0
        
        if sentences:  # Check if sentences list is not empty
            # Calculate similarity based on sentence structure
            structure_similarity = len(sentences) / len(sentences)

        # Tokenize words and get part-of-speech tags
        words = [pos_tag(word_tokenize(sentence)) for sentence in sentences]

        # Check spelling using pyspellchecker
        spell = SpellChecker()
        misspelled = set([spell.correction(word) for sentence in words for (word, tag) in sentence if tag.startswith('N') or tag.startswith('V')])

        # Calculate punctuation similarity
        punctuation_sim = calculate_punctuation_similarity(text)

        # Calculate sentence length similarity
        length_similarity = calculate_sentence_length_similarity(sentences)

        # Detect active/passive voice
        active_voice_count = sum(1 for sentence in sentences if detect_voice(sentence) == 'active') / len(sentences)
        passive_voice_count = sum(1 for sentence in sentences if detect_voice(sentence) == 'passive') / len(sentences)

        # Grammar errors
        grammar_errors_count = len(misspelled)

        # Case usage (upper/lower case)
        upper_case_count = sum(1 for char in text if char.isupper())
        lower_case_count = sum(1 for char in text if char.islower())

        # Generate n-gram transition graphs
        n_value = 2  # You can adjust the n-gram size
        graph = generate_ngram_transition_graph(text, n_value)

        phrase_patterns = extract_phrase_patterns(text)

        # Additional features: Function word counts or presence/absence
        function_words = get_function_words(text)

        tfidf_vectorizer = TfidfVectorizer(stop_words='english')
        tfidf_matrix = tfidf_vectorizer.fit_transform([text])

        svd = TruncatedSVD(n_components=4)  # You can adjust the number of components
        lsa_matrix = svd.fit_transform(tfidf_matrix)

        # Calculate TTR for each text
        ttr = calculate_ttr(text)

        # Collect features into a list
        features = [structure_similarity, length_similarity, punctuation_sim, 
                    active_voice_count, passive_voice_count, 
                    grammar_errors_count, upper_case_count, 
                    lower_case_count, len(set(function_words)), len(graph.nodes), ttr] + list(lsa_matrix.flatten())
        
        # Standardize the features
        scaler = StandardScaler()
        features_normalized = scaler.fit_transform([features])

        return features_normalized[0]
    
    except Exception as e:
        print(f"Error in extract_features: {e}")
        traceback.print_exc()  # Print the full traceback for detailed error information
        return None

# Function to load and preprocess data
def load_data(author_a_texts, not_author_a_texts):
    # Label author_a texts as 1 and not_author_a texts as 0
    X = author_a_texts + not_author_a_texts
    y = [1] * len(author_a_texts) + [0] * len(not_author_a_texts)

    return X, y

def main(author_a_texts, not_author_a_texts):
    # Load and preprocess data
    X, y = load_data(author_a_texts, not_author_a_texts)

    # Extract features from the texts
    X_features = [extract_features(text) for text in X]

    # Remove None values
    X_features = [x for x in X_features if x is not None]

    # Convert feature list to numpy array
    X_features = np.array(X_features)
    y = np.array(y)

    # Split data into train and test sets
    X_train, X_test, y_train, y_test = train_test_split(X_features, y, test_size=0.2, random_state=40)

    # Define the GRU model
    model = Sequential([
        GRU(64, return_sequences=True),
        GlobalAveragePooling1D(),
        Dense(1, activation='sigmoid')
    ])

    # Compile the model
    model.compile(optimizer='adam',
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    

    # Train the model
    history = model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2, callbacks=[EarlyStopping(patience=3)])

    # Print debug information
    print("X_train shape:", X_train.shape)
    print("y_train shape:", y_train.shape)

    # Check model architecture
    print("Model summary:")
    print(model.summary())

    # Check for NaNs or infinite values
    print("NaNs in X_train:", np.isnan(X_train).any())
    print("Infinite values in X_train:", not np.isfinite(X_train).all())

    # Evaluate the model
    _, accuracy = model.evaluate(X_test, y_test)
    print("Accuracy:", accuracy)

    # Predictions
    y_pred = (model.predict(X_test) > 0.5).astype("int32")

    # Confusion matrix
    cm = confusion_matrix(y_test, y_pred)
    print("Confusion Matrix:")
    print(cm)

if __name__ == "__main__":
    author_a_texts = ["""The 4 tile mural I worked a week on went into the kiln,along with everything else, and thankfully everything was ok except the mural.  The underglaze was too thick, and the glaze was too thick (I decided to float glaze the tile for fear of smearing all the black.)  The glaze actually picked up the black, moved it over, and then fired in a big blob in several spots.  Live and learn.  I just don't know if I have it in me to make another one.   I'll have to dig deep for this one.  I guess I should be thinking of the kiln as half full instead of half empty!""",
    """I'm so tired today because I was up all night worrying about the kiln firing.  It smelled something fierce, and I was worried we were all going to die of carbon monoxide poisoning in our sleep.  Plus, I kept hearing banging, which I hope wasn't anything exploding in the kiln, but I haven't found out yet because it's still 600 degrees the next day.""",
    """I happily called the lady about the tile mural that was just set to see how great it looked.  To my suprise, she was very shocked at how warped the tiles look when set.  I was so upset I had nightmares all night and obsessed about it all day. Luckily, she called me back the next day to apologize because she was in a bad mood.  P.s.  Their check was returned the next day.""",
    """Today I must get 100 bisque white tiles today.  My supplier of 7 years has my order of 4 weeks ago delayed in Mexico in customs.  Note to self:  Remember Murphy.  Never assume anything.""",
    """MUST PAY SALES TAXES TODAY OR ELSE!  Something about a deadline is a sure cure to get one off one's butt.  Yesterday placed a free ad in the local paper. Cost for free, and for free stuff-This should be VERY interesting to see the response. This week I should test some cone 5 glazes just for the heck of it.""",
    """fired the last mural.  now I'm depressed.  it's kind of like planning a wedding all year and then the day after you have nothing to do.  Not that I have nothing to do, just no one telling me what I have to do. (work orders) I could have been better prepared with a summer camp to fall into; I'm certainly getting phone calls.  But I was too sick to plan a couple of months ago."If you fail to plan, you plan to fail!"  On another note, I am regretting not buying the small test kiln.  Twice , and now three times I have lost a customer or an opportunity because I didn't have a smaller kiln to do test tiles in or cone 10, or whatever.  I will be getting one soon.""",
    """make hump molds make slab shapes:  babies, women,large star windchimes, small stars make multi-level vase  garden tiles or initial tiles peacock tray baby stuff for daniel-frame w letters, ornament, train plaque,send tiles for footprints(also to Liz) mix colored dipping glazes in quart containers from Smart and Final marketing tools needed: scout flyers party flyers new maps or general flyers summer camp schedule flyers baby footprint postcards fix website babyfootprint gift certificates""",
    """well, the Robinson mural worked out.  4 tiles cracked or broken, all re-painted.  Hope they like them.  The Cordillera mural is getting bigger everyday, now 75 more tiles than anticipated.  They look beautiful going into the kiln.  Double stilting them for less warping.  Had a chance to teach a self-portrait class for children;one of my favorites.  This week had my second student for handmade tiles who wants to go into business as such.  After a little trepidation, I gave into the fact that I am a teacher, and so I teach.  She however wants to learn clay crafting, versus painting, so this is fun.  Got to break open my plaster, and discovered I have a love-hate relationship with plaster carving. Thought it might be easier if I colored the plaster in three after mixing the batch and pour it in layers, so you could see what you are doing. I do love pouring molds.""",
    """I have a school painting on Monday.  For some reason I have a total mental block about the glazes.  Couldn't get the bottles I want,don't want to use the old ones.  Debating which glazes to use, the yucky cheap ones, or the nice expensive ones.  Usually, I have no choice.  I just don't want to spend any more money on half-used glazes. I have a hundred different colors, but not enough of any one to fill 12 bottles.""",
    """Well, I got a call from the mural organizer who asked if the tiles would be ready to view tomorrow.  Of course, I hadn't even re-fired the tiles yet, or done the two "dog paw" accents, or the tile that no one wanted to paint, or the stupid 4 tile center.  So I had to fess up, and beg for more time.  Of course, now that my rear is on the line, I managed to become amazingly inspired and finish the two accent tiles while my kids got ready for school.  I cancelled a doctor's appointment, and painted the stupid center.  Not perfect and beautiful like the last one, but it's there at least.  Now the dilemma of how lazy do I want to be?  Should I leave it black and white, or color it in, and if I color it in, how much color should I bother with?  Or is it passable as it is?  Mostly, I just fear total failure like the last time.""",
    """The great news is the mural I started working on last year, I was asking $6/tile. At that rate I wasn't going to get anywhere, so I asked for $8/tile. I've waited a year, and the lady felt so bad, she said they would give me $10/tile..Yippee!""",
    """Today I need to start mixing glazes for the last tile painting for the school year. The question is can I get 200 of the new nozzle bottles I tried out this weekend by Friday shipped and filled? Or should I go with what I have , which now seems like crap compared. They of course will never know the difference. Re-painting 2 tiles that broke, refiring two broken tiles, and refiring 6" tiles that the glaze didn't flatten out all the way. Must be done asap because they are setting this weekend, and has already been delayed once. Guess I better start re-painting those stupid 4 part mural tiles. Ugh!""",
    """Today I had a glass artist over for a firing.  It was a good excuse to do some research on fused glass.  My past attempt at painted,fused, and slumped glass turned out so-so.  I have some material already, so it would be nice to learn how to use it properly.  She is an older woman, and I had a nice time talking about glass with her; how she sells her stuff (in Venezuela), and just about life as an artist.  I custom programmed my kiln (she usually does it manually).  It was interesting to know that you can open a red hot flaming kiln with glass inside with no breakage.  I can't wait to see what is inside.  Learned about cutting glass, slumping in bisque, applying enamels to gum arabic through a sifter, using elmers glue to stick shards of glass together, using a metallic sharpie to write with on glass, and firing inclusions and dichroic glass.  Like to test frit on clay and glass.  baking soda makes bubbles between glass (use sparingly).Use of fiber paper vs. kiln wash.  slump at a higher temperature and fire paint at a lower temp to keep intensity of color.Use ceramic frames for drop molds (dishes)  Cracked bisque works fine as a glass saggar! """,
    """Attending NCECA in San Diego in 2003 was a turning point for me in many ways. Little did I know when several people asked if I was going, that it was more than I could have imagined. I went reluctantly, tired from work, but curious. When I got there I was lost, and wandered aimlessly, not knowing what I had walked into. I paid my $65, and set off to see what it was that everyone thought was so great. I wandered in and out of lectures and demonstrations. I was most interested in the business lectures, only really wanting to find a way to make a living doing what I love. I wandered through exhibits, spying the mug sale, the cone box contest, and the k-12 children's entries. Was my stuff up to par with the "real" teachers who had a degree? I vowed I would enter next year, just to be competitive.(I didn't , but that's another story). I was really excited to go to the basement area where everyone was selling everything. Tools I didn't know existed, schools beckoning (asking myself, how would my life had been different if I had majored in ceramics, and not married and had children), companies throwing samples my way by the caseloads. Paper, paper, and more paper. The next day was better, knowing that I was there to learn as much as possible in a short time period. I sat through lectures and demos. I absorbed conversations and watched people look and watch. When I got back home to my studio, I wasn't the same. When I left, I was a housewife that had more than a passing interest in a hobby. I was an entrepeneur, trying to find the holy grail that would catapault me from sometimes breaking even to supporting myself. When I came back, I felt like an artist. I realized I knew much more than I thought. I realized that the real world experience I had jumped into blindly had given me more opportunity than most people get in a lifetime of study. I saw my life 20 years from now, and 40 years from now, planning what I would like to do when the kids are grown and this season of my life had passed. I saw myself, 70 years old, touching the clay and asking the questions...... First , when I got back to work, I was engulfed by production and exploring new avenues of business. I taught with a new confidence, that yes, I knew what I was doing with what I did, and everything else would come later. I experimented more, and slowly the studio became a studio, not a storefront. I had an apprentice, and a muse. I would spend hours with the music on, in the silent of my space, pondering the next projects, or working with ferocity. I realized the sacrifices I had made as an artist, in my ventures as a businesswoman. I had no extra time or energy to "create" for the sake of creating, going into the unknown with no "agenda". I did not know what that felt like. I closed the studio. It felt like death. Where was my purpose without a store to support? I hated being just a mother. I almost couldn't do it, and didn't have to. I had renegotiated my lease for pennies. But I knew I had to cut off my arm for another one to grow literally. I moved the studio to my home, like a lot of potters do. I am lucky that I have patient people who live with me that accept the studio taking over the whole of the house. The driveway, the garage, the courtyard, the livingroom, the office, even in the bedroom. They know my sanity lies in it.""",
    """ceramic doorhangers with addons fused in themes: horse,flower ect-blank for dry erase ceramic lightswitch faceplates with addons fused in themes also pour lightswich plates then handbuild over them and around them gifts to do:scriffito doorhangers for stefani,emily,and natalie daniel and also ceramic babybottle bank for daniel and babyblocks frame family tree large tile with handbuilt additions and a "wall" or fence around it GO GET 200 BOTTLES FROM C +C WHEREHOUSE AND PICKUP AND RANDIS""",
    """recently tried a new dipping clear that unfortunately was discontinued due to lead leeching. Won't use it on dinnerware, but, oh my god, it is beautiful. Good thing I didn't return it to the factory like they wanted. Wonder if they'll still sell it with a different label warning. They should! I will write them because they took an uneccessary beating because of the mistake."""]  # Provide a list of texts from author A
    not_author_a_texts = ["""						
        As promised, here's the next instalment of bus mongs.  I bet you've been looking forward to this, haven't you...   2. Bus Monitors  Now, in every walk of life, in every profession, in every place where humans exist there are heirachies.  I accept these heirachies with varying degrees of grace.  But, if there is one thing that makes me want to stick two fingers up to "The Man" and form a rock n' roll band, it's people who assume importance and status without any requirement for them to exist.  I have to be careful here to convey exactly what I mean.  I want you to understand.  Two elderly women on my bus service have elected themselves bus monitors.  As far as I know, there was never any formal nomination.  Let's be clear; these people have assumed the position of bus lords.  This basically involves:  a)  Sitting right behind the driver and shouting conversations at him in a "spirit of the blitz" style dialect.   Eg: "Ooh 'ello Frank, I 'ope you'll be putting yer foot down today, my Bert's expecting his dinner!".   Essentially, mindless, insiduous prattle.  The volume at which these conversations take place cow everyone around them into aural submission.  No-one can read, listening to music is impossible, and quiet chats with friends are verboten.  Essentially, this is an exercise in illustrating that they are friends with the driver, and so assume some of the importance they crave by association.  They rarely look around or even notice other bus people, the bus people they nominally claim to represent.  b) Getting on the bus first.  This is truly the raison d'etre of the bus monitor.  They force themselves, elbows and handbags flailing, onto the buses first for three reasons.  Firstly, this (again) gives them the air of importance and status that they crave.  Secondly, getting on the bus first gives them first choice of seats - they can then position themselves in prime bus real estate for loud driver conversations.  Thirdly, this allows them to have protracted chats with the driver, and fumble for their tickets whilst a large queue stretches back outside getting drenched in the rain.   c)  On the rare occasions where a new driver has been in place (I always feel great sympathy for these hapless footsoldiers, thrust naively onto the battlefield), bus monitors enter a state of heightened awareness.  Not content with shouting often unnecessary directions into the side of the driver's head, they will also offer information on who normally gets on at those stops, whether to wait for them if they aren't there and other classified, bus-monitor-priveleged information.  MI5 themselves would have dossiers less detailed on members of the Taliban.  d) On the rarer still occasions where the bus makes a wrong turning, the bus monitors become a flurry of activity.  "Wrong way!" they shout, whilst looking around incredulously at fellow passengers, as if the driver had defaced a war memorial.  e) Bus monitors are the guardians of bus protocol.  Although they can blatantly disregard other passengers, any kind of ignorance on the part of other passengers is met with disapproving looks.  Any breach of accepted protocol, whether or not you have ever been in this country before, been on a bus before, have the use of your arms and legs etc is met with their clear disgust.    Wedged into their seats with their old-woman paraphanelia, these are actually quite sad individuals.  I can only imagine the voids in their lives must have become slightly less yawning when they found solace in bossing people about on buses.  In two years of bus usage, I have yet to see them justify their self-appointed positions, and on top of it all, they clearly enjoy this.  They act like they are doing me a favour.  If getting on my nerves and stinking of Parma Violets is somehow helping me, I can only marvel at what my shortcomings must have been to start with.  Perhaps I was too relaxed and the bus didn't smell of Parma Violets enough.  We can but wonder.  This is just a small sample of the irritations that these people cause, and for once, I am not just saying that because I can't think of anything else.  It really is just a small sample.""",
    """						
        In case any of you people care, I am one of the hardy souls of this world who commute to work.  Yes, I get the bus.  And I like it.  In fact, a 30-45 minute journey in the morning is an unbelievably relaxing way to get to work.  In a carefully temperature controlled cocoon, you can pop a bit of music on and watch the scenery, leaving all the actual "doing" to someone else.  Namely the driver.  It's difficult to convey the benefits of merely sitting, doing nothing, on an adequately comfortable seat, and not having to worry about anything for half an hour.   This is, in theory, superb.  However, my idyll in this metal tube with wheels is frequently tested by putrid invaders.  Invaders of the worst kind.  Space invaders, if you like.  I have attempted to categorise them in a new series, starting below.  All users of public transport will identify them.  And though they have many names, their presence is unmistakable.    1.  The feckless youngster.  Yesterday a regular user of my bus service, a feckless young girl, brought into sharp focus why I hate other bus people so much.  Perhaps I should avoid the term "bus people", as this either suggests a gypsy-like existence in an abandoned bus, or people who actually resemble buses.  Either way, it's not what I am trying to say.  Basically, I shall now define "bus people" as people who get the bus, in order to avoid confusion.  Anyway, her crimes against me are myriad and serious.  In an international court of bus law (ICBL) she would probably be tried and sentenced to death.   We have a distinct history.  It all started when, about 18 months ago, this individual started to wait at my stop.  Looking little different from the usual slack-jawed windowlickers of my home town, I paid little heed, instead assuming my favourite bus-waiting position of roughly perpendicular to the shelter in order to look up the hill, legs heroically akimbo like the Collossus of Rhodes.  I was somewhat surprised when she boarded the private vehicle which takes me to work.  Perhaps I had misjudged her, despite her appearance and demeanour.   A few weeks passed with respectful silence between us.  All was well, and I felt we had formed an invisible bond of ignoring eachother.  But then, a terrible thing happened. One day, she approached the bus stop, and I was unfortunate enough to momentarily lock eyes with her.  This, as most people would doubtless know, is a pre-cursor to some kind of conversation.  To my alarm, I had discovered that my mouth was open as well.  Snapping it shut, I did my best to rescue the situation.  I noticed that something was different about her... something was amiss.  My mind raced to pin it down.  Of course! Her hair.  She had dyed her hair.    "I like your hair" I said, before the full disastrous impact of what I had done hit me.   I had sparked up a conversation with a bus person!  No more louche days reading in the window seat, listening to the latest grooves.  No more beautiful days watching the speeding countryside.  I would be sucked in, engulfed in this desperate harlot's whirlygig of hair chat.  Maybe the whole situation would escalate to shopping, or worse, work.  Oh cruelest of all fates!!! Why?  Why did my tongue forsake me, when I most needed it to stop it's diabolical dance!  "Oh, thanks, I only di...."  By this time I had run onto the bus.  I couldn't risk more contact or possible friendship with this woman.  She would doubtless destroy what little peace I could wrestle from my day.  More would come of this, I was sure, and indeed it did.  An insidious campaign of irritation followed.  Once, the bus arrived ridiculously early, and we both missed it.  An uncomfortably long period of waiting ensued, before it was clear that no bus would be coming.  I was forced by the situation to offer a non-commital "I think we've missed it".  She rudely turned her back and stormed off, frantically jabbing at her mobile phone.  As we were both bound for the same destination, and we had both missed the same bus, a nice gesture would have been to offer a place in the lift she was undoubtedly arranging (although I would have turned her down on principle).  Instead she glared at me as if I had somehow Karmically arranged the absence of the bus in order to ruin her day.    This week alone, of the 5 days which are busable, she has neglected to have a ticket on 3 days.  This is not only gyppo behaviour, but is also an embarrassing social situation, which I seek to avoid at all times.  All 3 times, she has been "let off" the fare, which has only exponentially increased my contempt for her.  Then there's the running.  I get on the bus first, due to clever kerbside positioning.  She gets on immediately afterwards, and I swear she runs directly behind me, hurrying me along.  I feel obliged to hurl everything into the seat and dive out of her way.  Why she feels the need to hurtle up the bus is a mystery to all except me.  To me, it is but more evidence of her idiocy.    It's clear she thinks she is the J-Lo of the bus community.  Well she got her commupance today alright.  As the bus drew near, some schoolchildren passed us.  Their cries of "She's got a £2 handbag!" were delight to my ears as they systematically humiliated my self-important co-busee, who dresses like someone doing an impression of a character from Sex in the City down on their luck.  Other times the bus has pulled away, as she frantically runs behind it, and I have merely sat, smiling smugly.  Oh, good will have it's days.  But such are the cosmic forces of yin and yan that my victories are only part of a timeless struggle.  One which must be won at all costs. 
    """,
    """						
        They're Good, but Let's Not Start Any Wars Over Them   Well, in a new section of the page, I look at music and decide whether it's any good, for the benefit of you, the reader.  I will call it "My Opinion on Music".  Or "Reviews".  Yeah, that one.       Well, Franz Fedinand (or "The 'Nand" as I haven't christened them) are a Scottish indie type outfit.  That doesn't do them justice - "Indie" is used far too loosely nowadays to have any real meaning.  In this instance, let's take it to mean that they are progressive and slightly non-conformist. What's their sound like? I'll tell you.  They owe a big debt to Tom Verlaine and Television.  That kind of skewed funkiness cut through with some melodious guitar work and bass lines.  Then, in other instances, lead singer Alex Kapranos sounds like a more coquetteish Ian Curtis.  Either way, the mix spells funky and the music spells good. There's flashes of Iggy Pop's The Idiot in the density of some of the tracks, flashes of The Pixies in the pop-artful approach to lyrics.  Bizzarely, some parts of the album also recall Blondie at their Parallel-Lines zenith.  You work it out.  I can't be bothered. I've read and heard comparisons to "The 'Werk" (Kraftwerk).  This is pretty crass on the surface - there's snatches of German on some tracks, which is probably the main reason for the comparisons.  However, having said that, there is an undercurrent of a peculiarly teutonic baroque.  Difficult to pin down, but themes like darkened cinemas and dancing with men called Michael conjure a particularly Weimar atmosphere, in my mind at least. So we've established that their influences are a smorgasbord of left-field  artists.  But what is the driver that make The Nand stand out? Well there are moments of adreneline pumping brilliance.  The type that makes you want to go out and have a fight or run really fast, like all the best music does.  The opener "Jacqueline" is a multi-layered romp which displays a joy for words and sound which is refreshing.  It's slightly self-consciously skewed - it's not full-on absurdity, but has kind of taken a toffee hammer and tapped the norm hard enough to make it less normal.  Rhyming "spectacles" with "erecticles" is one such example.  The barnstorming chorus, which extols the virtues of holidaying is another.  A well rounded debut, all in all, but as a friend said to me after the Stroke's first album - "Where do they go from here?".  They might have just painted themselves into a corner by releasing something so polished so soon. Time will tell, but until that time tells, don't go assassinating any Archdukes.  

    """    ,    """						
    I can't think of anything to write today, so this is going to go one of two ways.  Either I will turn this into an entertaining missive on not being able to write anything, or it will just grind to a halt, teetering precariously on the keep/delete axis.  Nearly ground to a halt after that sentence.  I suppose this hinges now on how long I have to continue for to make this a missive.  I don't know if there is a central agency which sets the length of missives, tracts and statements.  If not, there should be.  It would at least prevent confusion at times like this. 
    """ , """						
    I thought today about forming a Lonely Club.  Not that I'm lonely, but it seemed a compassionate thing to do.  To get lonely people together in a non-threatening atmosphere.  I could send out leaflets which say things like "Spend a lot of time on the Playstation?" or "Lonely?".  I think there would be a good response.  And then I could franchise it out, to other Lonely Co-ordinators - an entire network of Lonely Clubs could spring up, eradicating loneliness forever.  But then I thought, what if no-one turned up?  Could there be anything more tragic than someone organising a Lonely Club meeting and ending up totally alone.  That could push some Lonely Club organisers over the edge.  I suppose they could work with a friend, you know, so they didn't get Lonely. 

    """ , """						
    So I got my Digital Camera and I pretty much have it all figured out, I just need to know how to get pictures posted up on here now.  So off I go to explore and hopefully the next post will have a picture.  WEEEEEEEEEEEEEEEEEEEEE!!!
    """ , """						
    Did you ever wake up one day and everything just seemed to go totally right?    You actually want to get out of bed even though it is 4:00 a.m., your shower is awesome, your son is ready ON TIME for school, you look decent, the road to work is practically flawless and your favorite songs are all playing on the radio, your Mocaccino is Orgasmic and your Boss is in such a chirpy mood...    Well, today is that day for me and it just keeps on getting better.  My Boss told me that today was THE DAY for my bonus (Prefect timing because I have really been wanting that Digital Camera).  My cousin is in town and I rarely see her (she moved to Alberta, then Yellow Knife, now Niagara Falls) and a bunch of us are going to go for drinks tonight so its going to be picture time.  Its not sunny outside, but its warm (mostly humid but it's ok cause I left my hair curly today).  I am going to actually have time to take a full hour lunch and I will get to do so with my dad, brother and cousin.  And last but not least (or maybe Least but not last?) I am actually getting quite a bit of work done (well not right THIS second) so I wont feel guilty this weekend and think about all the things I have to do on Monday.  I dunno, maybe this is lack of sleep talking, but it really is a great day, it doesn't take much to please me huh? LOL  Ok, well back to work I go, have a good weekend.

    """ , """						
    I think I have had enough with men for at least the next 5 years.  Either I am super unlucky or I am a real Bitch (I am leaning more towards the earlier).  I can't seem to meet anyone half decent even if my life depended on it.  I am not talking about a serious, lets be monogamous type of relationship, I am talking about a simple friendship.  I have been talking to this guy for 4 years, Matt, aka Mr. Arkansas, we have shared every little secret (or at least I did) and every thought and fantasy and feeling and out of no where I am a bitch because I demand a little more after four fucking years.    My fuck friend on the other hand... He's in general not too bad... I just only see or hear from him when he wants some, god forbid Cindy has any needs.  There have been other guys over the last 2 years, one so called friend only called me or came by to smoke-up and watch movies when he was single, once he found himself a new fling, bye bye Cindy.  I called him on it the last time it happened, he said he would try and squeeze me in, I told him not to bother and guess what, that was the end of that.  This other guy, claimed he liked me and wanted to date me and so on, it was all BS.  I am ok with casual sex, I am only human and have needs too.  If that's all you want, just be up front about it and whatever decision I make at least it is my fault and I can't blame anyone but myself.  You would think that is pretty simple but no, not in this world.  I don't really know what I want and I am not out there trying to hook up with anyone, however I do know one thing, I want a friend (a male one, actually a woman would do just fine), I guess I am looking for a friend that I can be intimate with and also depend and trust.  I don't know if that makes sense, I'm so confused, I'm so tired of being alone.  Blah!!!
    """ , """						
    One of those killer days where nothing goes right for the boss, and you get blamed or the littlest thing happens and you get tons of shit.  I have a pounding head ache, I haven't had lunch or any break as a matter of fact.  All I want to do is go home, eat and take a long ass MOFO bubble bath.  Ciao!
    """ , """						
    Is my interest in this whole thing waning?  It wouldn't surprise me.  I had a go with a couple of these before.  They turned out rubbish. On the other hand, this is day 2.  You're still here.  So am I.  Both of I.  So this has turned out great!  Maybe a 2nd anniversary party should be arranged.  I have just the people in mind.
    """ , """						
    So I had a new patient yesterday, a man in his 90's, a sweet old man who is probably very lonely.  I always ask my patients how their weekend was or if anything special happened in their week and one thing led to another and I found out that his grandson never visits.  He lives in town and it is so sad that he never goes and visits.  The patient was telling me that he has never even met his great grand kids.  I just couldn't believe it.  I asked him if he had any other family in town and he mentioned a grand daughter.  When she came to pick him up, I pulled her aside and told her she needs to bring her kids to visit their grand father and great grand father.  She said that she would try and make an effort but she didn't really know what to say to her kids.  I was floored.  It's your family, you don't need to say anything special, just go and visit.  They left and I just felt so sad.  I really hope she does make an effort.  I asked my kids if they would ever not visit their grand parents and they said they would be upset if they couldn't visit them.  I hope I am raising them right, I would be so sad if they didn't visit me when I was old and couldn't do things on my own. 

    """ , """						
    Today is the anniversary of Elvis' death.  What do you think happened to Elvis?  Is he still alive?  I don't think he is, I mean look at all the cheeseburgers he ate... However, I am one of those people who like to believe that he is still alive, you know, just chillin, living on some remote island with Tupac and Biggy.  You know, that's really not that weird of a theory. 

    """ , """						
    I'm gonna go ahead and assume that a majority of the people who read this don't watch much t.v. or if you do, its most likely Discovery, History, National Geographic or some other channel that requires you to think a little bit (come on, if you watch the learning channel, you at least have to think a LITTLE).  I too, watch those channels, but every now and then, I like to shut off my brain and watch some mindless crap.  So last night, I watched one of my favorite movies (mainly cause it makes me laugh) "Sweet Home Alabama".  Love it.  If you have never seen it, shame on you!  You need to go and rent it right now, go ahead, I'll wait....   Ok, now that you have seen it, don't you just love it?  It's so cheesy and so predictable but you know what, I love those types of movies.  Another movie I love, "Two Weeks Notice", have you seen it?  Its another good one.  I have to say, Sandra Bullock and Hugh Grant make a great pair.  Oh, and lets not forget "Bridget Jones' Diary", how can you NOT love that movie?  You gotta love Bridget, she's awesome.  Hmm, what other movies do I like?  OH, "How to Lose a Guy in Ten Days", Love that one too.  Kate Hudson and Matthew McConaughey are awesome together.  Ok, I think that's enough for now, I could go on and on.  You should write to me and let me know what movies you like to watch, I am always on the look out for a good chick flick.  Ok, well, I'm out, have a great day!   """
    ]  # Provide a list of texts not from author A
    main(author_a_texts, not_author_a_texts)

  self.explained_variance_ratio_ = exp_var / full_var
  self.explained_variance_ratio_ = exp_var / full_var
  self.explained_variance_ratio_ = exp_var / full_var
  self.explained_variance_ratio_ = exp_var / full_var
  self.explained_variance_ratio_ = exp_var / full_var
  self.explained_variance_ratio_ = exp_var / full_var
  self.explained_variance_ratio_ = exp_var / full_var
  self.explained_variance_ratio_ = exp_var / full_var
  self.explained_variance_ratio_ = exp_var / full_var
  self.explained_variance_ratio_ = exp_var / full_var
  self.explained_variance_ratio_ = exp_var / full_var
  self.explained_variance_ratio_ = exp_var / full_var
  self.explained_variance_ratio_ = exp_var / full_var
  self.explained_variance_ratio_ = exp_var / full_var
  self.explained_variance_ratio_ = exp_var / full_var
  self.explained_variance_ratio_ = exp_var / full_var
  self.explained_variance_ratio_ = exp_var / full_var
  self.explained_variance_ratio_ = exp_var / full_var
  self.explained_variance_ra

Epoch 1/10


  self.explained_variance_ratio_ = exp_var / full_var


ValueError: in user code:

    File "c:\Users\pc\anaconda3\Lib\site-packages\keras\src\engine\training.py", line 1401, in train_function  *
        return step_function(self, iterator)
    File "c:\Users\pc\anaconda3\Lib\site-packages\keras\src\engine\training.py", line 1384, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "c:\Users\pc\anaconda3\Lib\site-packages\keras\src\engine\training.py", line 1373, in run_step  **
        outputs = model.train_step(data)
    File "c:\Users\pc\anaconda3\Lib\site-packages\keras\src\engine\training.py", line 1150, in train_step
        y_pred = self(x, training=True)
    File "c:\Users\pc\anaconda3\Lib\site-packages\keras\src\utils\traceback_utils.py", line 70, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "c:\Users\pc\anaconda3\Lib\site-packages\keras\src\engine\input_spec.py", line 235, in assert_input_compatibility
        raise ValueError(

    ValueError: Exception encountered when calling layer 'sequential_3' (type Sequential).
    
    Input 0 of layer "gru_3" is incompatible with the layer: expected ndim=3, found ndim=2. Full shape received: (None, 12)
    
    Call arguments received by layer 'sequential_3' (type Sequential):
      • inputs=tf.Tensor(shape=(None, 12), dtype=float32)
      • training=True
      • mask=None


In [7]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GRU, GlobalAveragePooling1D, Dense
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
import nltk
from nltk.util import bigrams
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.corpus import stopwords
from nltk.collocations import BigramCollocationFinder, BigramAssocMeasures
from nltk.stem import PorterStemmer
from spellchecker import SpellChecker
import string
import networkx as nx
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import TruncatedSVD
from sklearn.preprocessing import StandardScaler

# Function to extract significant phrase patterns from text
def extract_phrase_patterns(text):
    # Tokenize the text into words
    words = nltk.word_tokenize(text)
    
    # Create a bigram collocation finder
    bigram_finder = BigramCollocationFinder.from_words(words)
    
    # Apply a statistical measure to identify significant collocations
    scored_collocations = bigram_finder.score_ngrams(BigramAssocMeasures.chi_sq)
    
    # Extract collocations that meet a certain threshold
    significant_collocations = [bigram for bigram, score in scored_collocations if score > 3.0]
    
    return significant_collocations

# Function to calculate punctuation similarity
def calculate_punctuation_similarity(text):
    # Define the set of punctuation marks
    punctuation_marks = set(string.punctuation)

    # Count punctuation marks in the text
    punctuation_counts = {punctuation_mark: text.count(punctuation_mark) for punctuation_mark in punctuation_marks}

    # Compute punctuation similarity as the sum of squared differences between punctuation frequencies
    total_marks = sum(punctuation_counts.values())
    punctuation_frequencies = {mark: count / total_marks for mark, count in punctuation_counts.items()}

    # Compute similarity using squared Euclidean distance between punctuation distributions
    punctuation_sim = 0.0
    for mark in punctuation_marks:
        punctuation_sim += (punctuation_frequencies.get(mark, 0.0) - 1.0 / len(punctuation_marks)) ** 2

    punctuation_sim = 1.0 - punctuation_sim  # Normalize to [0, 1]
    
    return punctuation_sim

# Function to calculate sentence length similarity
def calculate_sentence_length_similarity(sentences):
    if len(sentences) == 0:
        return 0.0

    total_length = sum(len(sentence) for sentence in sentences)
    average_length = total_length / len(sentences)

    max_length = max(len(sentence) for sentence in sentences)
    min_length = min(len(sentence) for sentence in sentences)
    if max_length == min_length:
        return 0.0

    normalized_average_length = (average_length - min_length) / (max_length - min_length)

    return normalized_average_length

# Function to calculate part-of-speech tag similarity
def pos_tag_similarity(words1, words2):
    pos_tags1 = [tag for sentence in words1 for (word, tag) in sentence]
    pos_tags2 = [tag for sentence in words2 for (word, tag) in sentence]

    common_tags = set(pos_tags1).intersection(pos_tags2)
    total_tags = set(pos_tags1).union(pos_tags2)

    return len(common_tags) / len(total_tags)

# Function to extract function words from text
def get_function_words(text):
    function_words = set(["a", "an", "the", "I", "you", "he", "she", "it", "we", "they",
                          "in", "on", "under", "over", "between", "among",
                          "and", "but", "or", "if", "because",
                          "is", "am", "are", "was", "were", "be", "been", 
                          "have", "has", "had", "do", "does", "did"])

    words = word_tokenize(text.lower())
    function_words_in_text = [word for word in words if word in function_words]

    return function_words_in_text

# Function to generate n-gram transition graph
def generate_ngram_transition_graph(text, n):
    words = word_tokenize(text)
    ngrams = list(bigrams(words)) if n == 2 else list(nltk.ngrams(words, n))
    G = nx.DiGraph()

    for gram in ngrams:
        G.add_edge(gram[0], gram[1])

    return G

# Function to compute Jaccard similarity between graphs
def compute_jaccard_similarity(graph1, graph2):
    nodes_set1 = set(graph1.nodes)
    nodes_set2 = set(graph2.nodes)

    intersection = nodes_set1.intersection(nodes_set2)
    union = nodes_set1.union(nodes_set2)

    if len(union) == 0:
        return 0.0

    return len(intersection) / len(union)

# Function to calculate type-token ratio (TTR)
def calculate_ttr(text):
    words = word_tokenize(text.lower())
    unique_words = set(words)
    num_tokens = len(words)

    if num_tokens > 0:
        ttr = len(unique_words) / num_tokens
    else:
        ttr = 0.0

    return ttr

# Function to detect voice in a sentence (active or passive)
def detect_voice(sentence):
    words = nltk.word_tokenize(sentence)
    tagged_words = nltk.pos_tag(words)

    passive_indicators = ['is', 'am', 'are', 'was', 'were', 'been', 'being', 'be', 'by']

    if any(tagged_word[0].lower() in passive_indicators for tagged_word in tagged_words):
        return 'passive'
    else:
        return 'active'

# Function to extract features from text
def extract_features(text):
    try:
        if not text.strip():
            return None

        sentences = sent_tokenize(text)
        total_words = len(word_tokenize(text))
       
        structure_similarity = 0.0
        
        if sentences:
            structure_similarity = len(sentences) / len(sentences)

        words = [nltk.pos_tag(word_tokenize(sentence)) for sentence in sentences]

        spell = SpellChecker()
        misspelled = set([spell.correction(word) for sentence in words for (word, tag) in sentence if tag.startswith('N') or tag.startswith('V')])

        punctuation_sim = calculate_punctuation_similarity(text)

        length_similarity = calculate_sentence_length_similarity(sentences)

        active_voice_count = sum(1 for sentence in sentences if detect_voice(sentence) == 'active') / len(sentences)
        passive_voice_count = sum(1 for sentence in sentences if detect_voice(sentence) == 'passive') / len(sentences)

        grammar_errors_count = len(misspelled)

        upper_case_count = sum(1 for char in text if char.isupper())
        lower_case_count = sum(1 for char in text if char.islower())

        n_value = 2
        graph = generate_ngram_transition_graph(text, n_value)

        phrase_patterns = extract_phrase_patterns(text)

        function_words = get_function_words(text)

        tfidf_vectorizer = TfidfVectorizer(stop_words='english')
        tfidf_matrix = tfidf_vectorizer.fit_transform([text])

        svd = TruncatedSVD(n_components=4)
        lsa_matrix = svd.fit_transform(tfidf_matrix)

        ttr = calculate_ttr(text)

        # Collect features into a list
        features = [structure_similarity, length_similarity, punctuation_sim, 
                    active_voice_count, passive_voice_count, 
                    grammar_errors_count, upper_case_count, 
                    lower_case_count, len(set(function_words)), len(graph.nodes), ttr] + list(lsa_matrix.flatten())
        
        # Standardize the features
        scaler = StandardScaler()
        features_normalized = scaler.fit_transform([features])

        # Reshape the features to match the expected input shape of the GRU model
        # Assuming sequence_length = 1 (one feature vector per text)
        features_reshaped = np.expand_dims(features_normalized, axis=1)

        return features_reshaped
    
    except Exception as e:
        print(f"Error in extract_features: {e}")
        return None

# Function to load and preprocess data
def load_data(author_a_texts, not_author_a_texts):
    X = author_a_texts + not_author_a_texts
    y = [1] * len(author_a_texts) + [0] * len(not_author_a_texts)

    return X, y

def main(author_a_texts, not_author_a_texts):
    X, y = load_data(author_a_texts, not_author_a_texts)

    X_features = [extract_features(text) for text in X]

    X_features = [x for x in X_features if x is not None]

    X_features = np.array(X_features)
    y = np.array(y)

    X_train, X_test, y_train, y_test = train_test_split(X_features, y, test_size=0.2, random_state=40)

    model = Sequential([
        GRU(64, return_sequences=True),
        GlobalAveragePooling1D(),
        Dense(1, activation='sigmoid')
    ])

    model.compile(optimizer='adam',
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    
    history = model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2, callbacks=[EarlyStopping(patience=3)])

    print("X_train shape:", X_train.shape)
    print("y_train shape:", y_train.shape)

    print("Model summary:")
    print(model.summary())

    print("NaNs in X_train:", np.isnan(X_train).any())
    print("Infinite values in X_train:", not np.isfinite(X_train).all())

    _, accuracy = model.evaluate(X_test, y_test)
    print("Accuracy:", accuracy)

    y_pred = (model.predict(X_test) > 0.5).astype("int32")

    cm = confusion_matrix(y_test, y_pred)
    print("Confusion Matrix:")
    print(cm)

    
if __name__ == "__main__":
    author_a_texts = ["""The 4 tile mural I worked a week on went into the kiln,along with everything else, and thankfully everything was ok except the mural.  The underglaze was too thick, and the glaze was too thick (I decided to float glaze the tile for fear of smearing all the black.)  The glaze actually picked up the black, moved it over, and then fired in a big blob in several spots.  Live and learn.  I just don't know if I have it in me to make another one.   I'll have to dig deep for this one.  I guess I should be thinking of the kiln as half full instead of half empty!""",
    """I'm so tired today because I was up all night worrying about the kiln firing.  It smelled something fierce, and I was worried we were all going to die of carbon monoxide poisoning in our sleep.  Plus, I kept hearing banging, which I hope wasn't anything exploding in the kiln, but I haven't found out yet because it's still 600 degrees the next day.""",
    """I happily called the lady about the tile mural that was just set to see how great it looked.  To my suprise, she was very shocked at how warped the tiles look when set.  I was so upset I had nightmares all night and obsessed about it all day. Luckily, she called me back the next day to apologize because she was in a bad mood.  P.s.  Their check was returned the next day.""",
    """Today I must get 100 bisque white tiles today.  My supplier of 7 years has my order of 4 weeks ago delayed in Mexico in customs.  Note to self:  Remember Murphy.  Never assume anything.""",
    """MUST PAY SALES TAXES TODAY OR ELSE!  Something about a deadline is a sure cure to get one off one's butt.  Yesterday placed a free ad in the local paper. Cost for free, and for free stuff-This should be VERY interesting to see the response. This week I should test some cone 5 glazes just for the heck of it.""",
    """fired the last mural.  now I'm depressed.  it's kind of like planning a wedding all year and then the day after you have nothing to do.  Not that I have nothing to do, just no one telling me what I have to do. (work orders) I could have been better prepared with a summer camp to fall into; I'm certainly getting phone calls.  But I was too sick to plan a couple of months ago."If you fail to plan, you plan to fail!"  On another note, I am regretting not buying the small test kiln.  Twice , and now three times I have lost a customer or an opportunity because I didn't have a smaller kiln to do test tiles in or cone 10, or whatever.  I will be getting one soon.""",
    """make hump molds make slab shapes:  babies, women,large star windchimes, small stars make multi-level vase  garden tiles or initial tiles peacock tray baby stuff for daniel-frame w letters, ornament, train plaque,send tiles for footprints(also to Liz) mix colored dipping glazes in quart containers from Smart and Final marketing tools needed: scout flyers party flyers new maps or general flyers summer camp schedule flyers baby footprint postcards fix website babyfootprint gift certificates""",
    """well, the Robinson mural worked out.  4 tiles cracked or broken, all re-painted.  Hope they like them.  The Cordillera mural is getting bigger everyday, now 75 more tiles than anticipated.  They look beautiful going into the kiln.  Double stilting them for less warping.  Had a chance to teach a self-portrait class for children;one of my favorites.  This week had my second student for handmade tiles who wants to go into business as such.  After a little trepidation, I gave into the fact that I am a teacher, and so I teach.  She however wants to learn clay crafting, versus painting, so this is fun.  Got to break open my plaster, and discovered I have a love-hate relationship with plaster carving. Thought it might be easier if I colored the plaster in three after mixing the batch and pour it in layers, so you could see what you are doing. I do love pouring molds.""",
    """I have a school painting on Monday.  For some reason I have a total mental block about the glazes.  Couldn't get the bottles I want,don't want to use the old ones.  Debating which glazes to use, the yucky cheap ones, or the nice expensive ones.  Usually, I have no choice.  I just don't want to spend any more money on half-used glazes. I have a hundred different colors, but not enough of any one to fill 12 bottles.""",
    """Well, I got a call from the mural organizer who asked if the tiles would be ready to view tomorrow.  Of course, I hadn't even re-fired the tiles yet, or done the two "dog paw" accents, or the tile that no one wanted to paint, or the stupid 4 tile center.  So I had to fess up, and beg for more time.  Of course, now that my rear is on the line, I managed to become amazingly inspired and finish the two accent tiles while my kids got ready for school.  I cancelled a doctor's appointment, and painted the stupid center.  Not perfect and beautiful like the last one, but it's there at least.  Now the dilemma of how lazy do I want to be?  Should I leave it black and white, or color it in, and if I color it in, how much color should I bother with?  Or is it passable as it is?  Mostly, I just fear total failure like the last time.""",
    """The great news is the mural I started working on last year, I was asking $6/tile. At that rate I wasn't going to get anywhere, so I asked for $8/tile. I've waited a year, and the lady felt so bad, she said they would give me $10/tile..Yippee!""",
    """Today I need to start mixing glazes for the last tile painting for the school year. The question is can I get 200 of the new nozzle bottles I tried out this weekend by Friday shipped and filled? Or should I go with what I have , which now seems like crap compared. They of course will never know the difference. Re-painting 2 tiles that broke, refiring two broken tiles, and refiring 6" tiles that the glaze didn't flatten out all the way. Must be done asap because they are setting this weekend, and has already been delayed once. Guess I better start re-painting those stupid 4 part mural tiles. Ugh!""",
    """Today I had a glass artist over for a firing.  It was a good excuse to do some research on fused glass.  My past attempt at painted,fused, and slumped glass turned out so-so.  I have some material already, so it would be nice to learn how to use it properly.  She is an older woman, and I had a nice time talking about glass with her; how she sells her stuff (in Venezuela), and just about life as an artist.  I custom programmed my kiln (she usually does it manually).  It was interesting to know that you can open a red hot flaming kiln with glass inside with no breakage.  I can't wait to see what is inside.  Learned about cutting glass, slumping in bisque, applying enamels to gum arabic through a sifter, using elmers glue to stick shards of glass together, using a metallic sharpie to write with on glass, and firing inclusions and dichroic glass.  Like to test frit on clay and glass.  baking soda makes bubbles between glass (use sparingly).Use of fiber paper vs. kiln wash.  slump at a higher temperature and fire paint at a lower temp to keep intensity of color.Use ceramic frames for drop molds (dishes)  Cracked bisque works fine as a glass saggar! """,
    """Attending NCECA in San Diego in 2003 was a turning point for me in many ways. Little did I know when several people asked if I was going, that it was more than I could have imagined. I went reluctantly, tired from work, but curious. When I got there I was lost, and wandered aimlessly, not knowing what I had walked into. I paid my $65, and set off to see what it was that everyone thought was so great. I wandered in and out of lectures and demonstrations. I was most interested in the business lectures, only really wanting to find a way to make a living doing what I love. I wandered through exhibits, spying the mug sale, the cone box contest, and the k-12 children's entries. Was my stuff up to par with the "real" teachers who had a degree? I vowed I would enter next year, just to be competitive.(I didn't , but that's another story). I was really excited to go to the basement area where everyone was selling everything. Tools I didn't know existed, schools beckoning (asking myself, how would my life had been different if I had majored in ceramics, and not married and had children), companies throwing samples my way by the caseloads. Paper, paper, and more paper. The next day was better, knowing that I was there to learn as much as possible in a short time period. I sat through lectures and demos. I absorbed conversations and watched people look and watch. When I got back home to my studio, I wasn't the same. When I left, I was a housewife that had more than a passing interest in a hobby. I was an entrepeneur, trying to find the holy grail that would catapault me from sometimes breaking even to supporting myself. When I came back, I felt like an artist. I realized I knew much more than I thought. I realized that the real world experience I had jumped into blindly had given me more opportunity than most people get in a lifetime of study. I saw my life 20 years from now, and 40 years from now, planning what I would like to do when the kids are grown and this season of my life had passed. I saw myself, 70 years old, touching the clay and asking the questions...... First , when I got back to work, I was engulfed by production and exploring new avenues of business. I taught with a new confidence, that yes, I knew what I was doing with what I did, and everything else would come later. I experimented more, and slowly the studio became a studio, not a storefront. I had an apprentice, and a muse. I would spend hours with the music on, in the silent of my space, pondering the next projects, or working with ferocity. I realized the sacrifices I had made as an artist, in my ventures as a businesswoman. I had no extra time or energy to "create" for the sake of creating, going into the unknown with no "agenda". I did not know what that felt like. I closed the studio. It felt like death. Where was my purpose without a store to support? I hated being just a mother. I almost couldn't do it, and didn't have to. I had renegotiated my lease for pennies. But I knew I had to cut off my arm for another one to grow literally. I moved the studio to my home, like a lot of potters do. I am lucky that I have patient people who live with me that accept the studio taking over the whole of the house. The driveway, the garage, the courtyard, the livingroom, the office, even in the bedroom. They know my sanity lies in it.""",
    """ceramic doorhangers with addons fused in themes: horse,flower ect-blank for dry erase ceramic lightswitch faceplates with addons fused in themes also pour lightswich plates then handbuild over them and around them gifts to do:scriffito doorhangers for stefani,emily,and natalie daniel and also ceramic babybottle bank for daniel and babyblocks frame family tree large tile with handbuilt additions and a "wall" or fence around it GO GET 200 BOTTLES FROM C +C WHEREHOUSE AND PICKUP AND RANDIS""",
    """recently tried a new dipping clear that unfortunately was discontinued due to lead leeching. Won't use it on dinnerware, but, oh my god, it is beautiful. Good thing I didn't return it to the factory like they wanted. Wonder if they'll still sell it with a different label warning. They should! I will write them because they took an uneccessary beating because of the mistake."""]  # Provide a list of texts from author A
    not_author_a_texts = ["""						
        As promised, here's the next instalment of bus mongs.  I bet you've been looking forward to this, haven't you...   2. Bus Monitors  Now, in every walk of life, in every profession, in every place where humans exist there are heirachies.  I accept these heirachies with varying degrees of grace.  But, if there is one thing that makes me want to stick two fingers up to "The Man" and form a rock n' roll band, it's people who assume importance and status without any requirement for them to exist.  I have to be careful here to convey exactly what I mean.  I want you to understand.  Two elderly women on my bus service have elected themselves bus monitors.  As far as I know, there was never any formal nomination.  Let's be clear; these people have assumed the position of bus lords.  This basically involves:  a)  Sitting right behind the driver and shouting conversations at him in a "spirit of the blitz" style dialect.   Eg: "Ooh 'ello Frank, I 'ope you'll be putting yer foot down today, my Bert's expecting his dinner!".   Essentially, mindless, insiduous prattle.  The volume at which these conversations take place cow everyone around them into aural submission.  No-one can read, listening to music is impossible, and quiet chats with friends are verboten.  Essentially, this is an exercise in illustrating that they are friends with the driver, and so assume some of the importance they crave by association.  They rarely look around or even notice other bus people, the bus people they nominally claim to represent.  b) Getting on the bus first.  This is truly the raison d'etre of the bus monitor.  They force themselves, elbows and handbags flailing, onto the buses first for three reasons.  Firstly, this (again) gives them the air of importance and status that they crave.  Secondly, getting on the bus first gives them first choice of seats - they can then position themselves in prime bus real estate for loud driver conversations.  Thirdly, this allows them to have protracted chats with the driver, and fumble for their tickets whilst a large queue stretches back outside getting drenched in the rain.   c)  On the rare occasions where a new driver has been in place (I always feel great sympathy for these hapless footsoldiers, thrust naively onto the battlefield), bus monitors enter a state of heightened awareness.  Not content with shouting often unnecessary directions into the side of the driver's head, they will also offer information on who normally gets on at those stops, whether to wait for them if they aren't there and other classified, bus-monitor-priveleged information.  MI5 themselves would have dossiers less detailed on members of the Taliban.  d) On the rarer still occasions where the bus makes a wrong turning, the bus monitors become a flurry of activity.  "Wrong way!" they shout, whilst looking around incredulously at fellow passengers, as if the driver had defaced a war memorial.  e) Bus monitors are the guardians of bus protocol.  Although they can blatantly disregard other passengers, any kind of ignorance on the part of other passengers is met with disapproving looks.  Any breach of accepted protocol, whether or not you have ever been in this country before, been on a bus before, have the use of your arms and legs etc is met with their clear disgust.    Wedged into their seats with their old-woman paraphanelia, these are actually quite sad individuals.  I can only imagine the voids in their lives must have become slightly less yawning when they found solace in bossing people about on buses.  In two years of bus usage, I have yet to see them justify their self-appointed positions, and on top of it all, they clearly enjoy this.  They act like they are doing me a favour.  If getting on my nerves and stinking of Parma Violets is somehow helping me, I can only marvel at what my shortcomings must have been to start with.  Perhaps I was too relaxed and the bus didn't smell of Parma Violets enough.  We can but wonder.  This is just a small sample of the irritations that these people cause, and for once, I am not just saying that because I can't think of anything else.  It really is just a small sample.""",
    """						
        In case any of you people care, I am one of the hardy souls of this world who commute to work.  Yes, I get the bus.  And I like it.  In fact, a 30-45 minute journey in the morning is an unbelievably relaxing way to get to work.  In a carefully temperature controlled cocoon, you can pop a bit of music on and watch the scenery, leaving all the actual "doing" to someone else.  Namely the driver.  It's difficult to convey the benefits of merely sitting, doing nothing, on an adequately comfortable seat, and not having to worry about anything for half an hour.   This is, in theory, superb.  However, my idyll in this metal tube with wheels is frequently tested by putrid invaders.  Invaders of the worst kind.  Space invaders, if you like.  I have attempted to categorise them in a new series, starting below.  All users of public transport will identify them.  And though they have many names, their presence is unmistakable.    1.  The feckless youngster.  Yesterday a regular user of my bus service, a feckless young girl, brought into sharp focus why I hate other bus people so much.  Perhaps I should avoid the term "bus people", as this either suggests a gypsy-like existence in an abandoned bus, or people who actually resemble buses.  Either way, it's not what I am trying to say.  Basically, I shall now define "bus people" as people who get the bus, in order to avoid confusion.  Anyway, her crimes against me are myriad and serious.  In an international court of bus law (ICBL) she would probably be tried and sentenced to death.   We have a distinct history.  It all started when, about 18 months ago, this individual started to wait at my stop.  Looking little different from the usual slack-jawed windowlickers of my home town, I paid little heed, instead assuming my favourite bus-waiting position of roughly perpendicular to the shelter in order to look up the hill, legs heroically akimbo like the Collossus of Rhodes.  I was somewhat surprised when she boarded the private vehicle which takes me to work.  Perhaps I had misjudged her, despite her appearance and demeanour.   A few weeks passed with respectful silence between us.  All was well, and I felt we had formed an invisible bond of ignoring eachother.  But then, a terrible thing happened. One day, she approached the bus stop, and I was unfortunate enough to momentarily lock eyes with her.  This, as most people would doubtless know, is a pre-cursor to some kind of conversation.  To my alarm, I had discovered that my mouth was open as well.  Snapping it shut, I did my best to rescue the situation.  I noticed that something was different about her... something was amiss.  My mind raced to pin it down.  Of course! Her hair.  She had dyed her hair.    "I like your hair" I said, before the full disastrous impact of what I had done hit me.   I had sparked up a conversation with a bus person!  No more louche days reading in the window seat, listening to the latest grooves.  No more beautiful days watching the speeding countryside.  I would be sucked in, engulfed in this desperate harlot's whirlygig of hair chat.  Maybe the whole situation would escalate to shopping, or worse, work.  Oh cruelest of all fates!!! Why?  Why did my tongue forsake me, when I most needed it to stop it's diabolical dance!  "Oh, thanks, I only di...."  By this time I had run onto the bus.  I couldn't risk more contact or possible friendship with this woman.  She would doubtless destroy what little peace I could wrestle from my day.  More would come of this, I was sure, and indeed it did.  An insidious campaign of irritation followed.  Once, the bus arrived ridiculously early, and we both missed it.  An uncomfortably long period of waiting ensued, before it was clear that no bus would be coming.  I was forced by the situation to offer a non-commital "I think we've missed it".  She rudely turned her back and stormed off, frantically jabbing at her mobile phone.  As we were both bound for the same destination, and we had both missed the same bus, a nice gesture would have been to offer a place in the lift she was undoubtedly arranging (although I would have turned her down on principle).  Instead she glared at me as if I had somehow Karmically arranged the absence of the bus in order to ruin her day.    This week alone, of the 5 days which are busable, she has neglected to have a ticket on 3 days.  This is not only gyppo behaviour, but is also an embarrassing social situation, which I seek to avoid at all times.  All 3 times, she has been "let off" the fare, which has only exponentially increased my contempt for her.  Then there's the running.  I get on the bus first, due to clever kerbside positioning.  She gets on immediately afterwards, and I swear she runs directly behind me, hurrying me along.  I feel obliged to hurl everything into the seat and dive out of her way.  Why she feels the need to hurtle up the bus is a mystery to all except me.  To me, it is but more evidence of her idiocy.    It's clear she thinks she is the J-Lo of the bus community.  Well she got her commupance today alright.  As the bus drew near, some schoolchildren passed us.  Their cries of "She's got a £2 handbag!" were delight to my ears as they systematically humiliated my self-important co-busee, who dresses like someone doing an impression of a character from Sex in the City down on their luck.  Other times the bus has pulled away, as she frantically runs behind it, and I have merely sat, smiling smugly.  Oh, good will have it's days.  But such are the cosmic forces of yin and yan that my victories are only part of a timeless struggle.  One which must be won at all costs. 
    """,
    """						
        They're Good, but Let's Not Start Any Wars Over Them   Well, in a new section of the page, I look at music and decide whether it's any good, for the benefit of you, the reader.  I will call it "My Opinion on Music".  Or "Reviews".  Yeah, that one.       Well, Franz Fedinand (or "The 'Nand" as I haven't christened them) are a Scottish indie type outfit.  That doesn't do them justice - "Indie" is used far too loosely nowadays to have any real meaning.  In this instance, let's take it to mean that they are progressive and slightly non-conformist. What's their sound like? I'll tell you.  They owe a big debt to Tom Verlaine and Television.  That kind of skewed funkiness cut through with some melodious guitar work and bass lines.  Then, in other instances, lead singer Alex Kapranos sounds like a more coquetteish Ian Curtis.  Either way, the mix spells funky and the music spells good. There's flashes of Iggy Pop's The Idiot in the density of some of the tracks, flashes of The Pixies in the pop-artful approach to lyrics.  Bizzarely, some parts of the album also recall Blondie at their Parallel-Lines zenith.  You work it out.  I can't be bothered. I've read and heard comparisons to "The 'Werk" (Kraftwerk).  This is pretty crass on the surface - there's snatches of German on some tracks, which is probably the main reason for the comparisons.  However, having said that, there is an undercurrent of a peculiarly teutonic baroque.  Difficult to pin down, but themes like darkened cinemas and dancing with men called Michael conjure a particularly Weimar atmosphere, in my mind at least. So we've established that their influences are a smorgasbord of left-field  artists.  But what is the driver that make The Nand stand out? Well there are moments of adreneline pumping brilliance.  The type that makes you want to go out and have a fight or run really fast, like all the best music does.  The opener "Jacqueline" is a multi-layered romp which displays a joy for words and sound which is refreshing.  It's slightly self-consciously skewed - it's not full-on absurdity, but has kind of taken a toffee hammer and tapped the norm hard enough to make it less normal.  Rhyming "spectacles" with "erecticles" is one such example.  The barnstorming chorus, which extols the virtues of holidaying is another.  A well rounded debut, all in all, but as a friend said to me after the Stroke's first album - "Where do they go from here?".  They might have just painted themselves into a corner by releasing something so polished so soon. Time will tell, but until that time tells, don't go assassinating any Archdukes.  

    """    ,    """						
    I can't think of anything to write today, so this is going to go one of two ways.  Either I will turn this into an entertaining missive on not being able to write anything, or it will just grind to a halt, teetering precariously on the keep/delete axis.  Nearly ground to a halt after that sentence.  I suppose this hinges now on how long I have to continue for to make this a missive.  I don't know if there is a central agency which sets the length of missives, tracts and statements.  If not, there should be.  It would at least prevent confusion at times like this. 
    """ , """						
    I thought today about forming a Lonely Club.  Not that I'm lonely, but it seemed a compassionate thing to do.  To get lonely people together in a non-threatening atmosphere.  I could send out leaflets which say things like "Spend a lot of time on the Playstation?" or "Lonely?".  I think there would be a good response.  And then I could franchise it out, to other Lonely Co-ordinators - an entire network of Lonely Clubs could spring up, eradicating loneliness forever.  But then I thought, what if no-one turned up?  Could there be anything more tragic than someone organising a Lonely Club meeting and ending up totally alone.  That could push some Lonely Club organisers over the edge.  I suppose they could work with a friend, you know, so they didn't get Lonely. 

    """ , """						
    So I got my Digital Camera and I pretty much have it all figured out, I just need to know how to get pictures posted up on here now.  So off I go to explore and hopefully the next post will have a picture.  WEEEEEEEEEEEEEEEEEEEEE!!!
    """ , """						
    Did you ever wake up one day and everything just seemed to go totally right?    You actually want to get out of bed even though it is 4:00 a.m., your shower is awesome, your son is ready ON TIME for school, you look decent, the road to work is practically flawless and your favorite songs are all playing on the radio, your Mocaccino is Orgasmic and your Boss is in such a chirpy mood...    Well, today is that day for me and it just keeps on getting better.  My Boss told me that today was THE DAY for my bonus (Prefect timing because I have really been wanting that Digital Camera).  My cousin is in town and I rarely see her (she moved to Alberta, then Yellow Knife, now Niagara Falls) and a bunch of us are going to go for drinks tonight so its going to be picture time.  Its not sunny outside, but its warm (mostly humid but it's ok cause I left my hair curly today).  I am going to actually have time to take a full hour lunch and I will get to do so with my dad, brother and cousin.  And last but not least (or maybe Least but not last?) I am actually getting quite a bit of work done (well not right THIS second) so I wont feel guilty this weekend and think about all the things I have to do on Monday.  I dunno, maybe this is lack of sleep talking, but it really is a great day, it doesn't take much to please me huh? LOL  Ok, well back to work I go, have a good weekend.

    """ , """						
    I think I have had enough with men for at least the next 5 years.  Either I am super unlucky or I am a real Bitch (I am leaning more towards the earlier).  I can't seem to meet anyone half decent even if my life depended on it.  I am not talking about a serious, lets be monogamous type of relationship, I am talking about a simple friendship.  I have been talking to this guy for 4 years, Matt, aka Mr. Arkansas, we have shared every little secret (or at least I did) and every thought and fantasy and feeling and out of no where I am a bitch because I demand a little more after four fucking years.    My fuck friend on the other hand... He's in general not too bad... I just only see or hear from him when he wants some, god forbid Cindy has any needs.  There have been other guys over the last 2 years, one so called friend only called me or came by to smoke-up and watch movies when he was single, once he found himself a new fling, bye bye Cindy.  I called him on it the last time it happened, he said he would try and squeeze me in, I told him not to bother and guess what, that was the end of that.  This other guy, claimed he liked me and wanted to date me and so on, it was all BS.  I am ok with casual sex, I am only human and have needs too.  If that's all you want, just be up front about it and whatever decision I make at least it is my fault and I can't blame anyone but myself.  You would think that is pretty simple but no, not in this world.  I don't really know what I want and I am not out there trying to hook up with anyone, however I do know one thing, I want a friend (a male one, actually a woman would do just fine), I guess I am looking for a friend that I can be intimate with and also depend and trust.  I don't know if that makes sense, I'm so confused, I'm so tired of being alone.  Blah!!!
    """ , """						
    One of those killer days where nothing goes right for the boss, and you get blamed or the littlest thing happens and you get tons of shit.  I have a pounding head ache, I haven't had lunch or any break as a matter of fact.  All I want to do is go home, eat and take a long ass MOFO bubble bath.  Ciao!
    """ , """						
    Is my interest in this whole thing waning?  It wouldn't surprise me.  I had a go with a couple of these before.  They turned out rubbish. On the other hand, this is day 2.  You're still here.  So am I.  Both of I.  So this has turned out great!  Maybe a 2nd anniversary party should be arranged.  I have just the people in mind.
    """ , """						
    So I had a new patient yesterday, a man in his 90's, a sweet old man who is probably very lonely.  I always ask my patients how their weekend was or if anything special happened in their week and one thing led to another and I found out that his grandson never visits.  He lives in town and it is so sad that he never goes and visits.  The patient was telling me that he has never even met his great grand kids.  I just couldn't believe it.  I asked him if he had any other family in town and he mentioned a grand daughter.  When she came to pick him up, I pulled her aside and told her she needs to bring her kids to visit their grand father and great grand father.  She said that she would try and make an effort but she didn't really know what to say to her kids.  I was floored.  It's your family, you don't need to say anything special, just go and visit.  They left and I just felt so sad.  I really hope she does make an effort.  I asked my kids if they would ever not visit their grand parents and they said they would be upset if they couldn't visit them.  I hope I am raising them right, I would be so sad if they didn't visit me when I was old and couldn't do things on my own. 

    """ , """						
    Today is the anniversary of Elvis' death.  What do you think happened to Elvis?  Is he still alive?  I don't think he is, I mean look at all the cheeseburgers he ate... However, I am one of those people who like to believe that he is still alive, you know, just chillin, living on some remote island with Tupac and Biggy.  You know, that's really not that weird of a theory. 

    """ , """						
    I'm gonna go ahead and assume that a majority of the people who read this don't watch much t.v. or if you do, its most likely Discovery, History, National Geographic or some other channel that requires you to think a little bit (come on, if you watch the learning channel, you at least have to think a LITTLE).  I too, watch those channels, but every now and then, I like to shut off my brain and watch some mindless crap.  So last night, I watched one of my favorite movies (mainly cause it makes me laugh) "Sweet Home Alabama".  Love it.  If you have never seen it, shame on you!  You need to go and rent it right now, go ahead, I'll wait....   Ok, now that you have seen it, don't you just love it?  It's so cheesy and so predictable but you know what, I love those types of movies.  Another movie I love, "Two Weeks Notice", have you seen it?  Its another good one.  I have to say, Sandra Bullock and Hugh Grant make a great pair.  Oh, and lets not forget "Bridget Jones' Diary", how can you NOT love that movie?  You gotta love Bridget, she's awesome.  Hmm, what other movies do I like?  OH, "How to Lose a Guy in Ten Days", Love that one too.  Kate Hudson and Matthew McConaughey are awesome together.  Ok, I think that's enough for now, I could go on and on.  You should write to me and let me know what movies you like to watch, I am always on the look out for a good chick flick.  Ok, well, I'm out, have a great day!   """
    ]  # Provide a list of texts not from author A
    main(author_a_texts, not_author_a_texts)


  self.explained_variance_ratio_ = exp_var / full_var
  self.explained_variance_ratio_ = exp_var / full_var
  self.explained_variance_ratio_ = exp_var / full_var
  self.explained_variance_ratio_ = exp_var / full_var
  self.explained_variance_ratio_ = exp_var / full_var
  self.explained_variance_ratio_ = exp_var / full_var
  self.explained_variance_ratio_ = exp_var / full_var
  self.explained_variance_ratio_ = exp_var / full_var
  self.explained_variance_ratio_ = exp_var / full_var
  self.explained_variance_ratio_ = exp_var / full_var
  self.explained_variance_ratio_ = exp_var / full_var
  self.explained_variance_ratio_ = exp_var / full_var
  self.explained_variance_ratio_ = exp_var / full_var
  self.explained_variance_ratio_ = exp_var / full_var
  self.explained_variance_ratio_ = exp_var / full_var
  self.explained_variance_ratio_ = exp_var / full_var
  self.explained_variance_ratio_ = exp_var / full_var
  self.explained_variance_ratio_ = exp_var / full_var
  self.explained_variance_ra

Epoch 1/10


ValueError: in user code:

    File "c:\Users\pc\anaconda3\Lib\site-packages\keras\src\engine\training.py", line 1401, in train_function  *
        return step_function(self, iterator)
    File "c:\Users\pc\anaconda3\Lib\site-packages\keras\src\engine\training.py", line 1384, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "c:\Users\pc\anaconda3\Lib\site-packages\keras\src\engine\training.py", line 1373, in run_step  **
        outputs = model.train_step(data)
    File "c:\Users\pc\anaconda3\Lib\site-packages\keras\src\engine\training.py", line 1150, in train_step
        y_pred = self(x, training=True)
    File "c:\Users\pc\anaconda3\Lib\site-packages\keras\src\utils\traceback_utils.py", line 70, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "c:\Users\pc\anaconda3\Lib\site-packages\keras\src\engine\input_spec.py", line 235, in assert_input_compatibility
        raise ValueError(

    ValueError: Exception encountered when calling layer 'sequential_4' (type Sequential).
    
    Input 0 of layer "gru_4" is incompatible with the layer: expected ndim=3, found ndim=4. Full shape received: (None, 1, 1, 12)
    
    Call arguments received by layer 'sequential_4' (type Sequential):
      • inputs=tf.Tensor(shape=(None, 1, 1, 12), dtype=float32)
      • training=True
      • mask=None


In [8]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, GRU, Dense, Dropout
from sklearn.model_selection import train_test_split

# Sample data (replace with your dataset)
texts_positive = ["""The 4 tile mural I worked a week on went into the kiln,along with everything else, and thankfully everything was ok except the mural.  The underglaze was too thick, and the glaze was too thick (I decided to float glaze the tile for fear of smearing all the black.)  The glaze actually picked up the black, moved it over, and then fired in a big blob in several spots.  Live and learn.  I just don't know if I have it in me to make another one.   I'll have to dig deep for this one.  I guess I should be thinking of the kiln as half full instead of half empty!""",
"""I'm so tired today because I was up all night worrying about the kiln firing.  It smelled something fierce, and I was worried we were all going to die of carbon monoxide poisoning in our sleep.  Plus, I kept hearing banging, which I hope wasn't anything exploding in the kiln, but I haven't found out yet because it's still 600 degrees the next day.""",
"""I happily called the lady about the tile mural that was just set to see how great it looked.  To my suprise, she was very shocked at how warped the tiles look when set.  I was so upset I had nightmares all night and obsessed about it all day. Luckily, she called me back the next day to apologize because she was in a bad mood.  P.s.  Their check was returned the next day.""",
"""Today I must get 100 bisque white tiles today.  My supplier of 7 years has my order of 4 weeks ago delayed in Mexico in customs.  Note to self:  Remember Murphy.  Never assume anything.""",
"""MUST PAY SALES TAXES TODAY OR ELSE!  Something about a deadline is a sure cure to get one off one's butt.  Yesterday placed a free ad in the local paper. Cost for free, and for free stuff-This should be VERY interesting to see the response. This week I should test some cone 5 glazes just for the heck of it.""",
"""fired the last mural.  now I'm depressed.  it's kind of like planning a wedding all year and then the day after you have nothing to do.  Not that I have nothing to do, just no one telling me what I have to do. (work orders) I could have been better prepared with a summer camp to fall into; I'm certainly getting phone calls.  But I was too sick to plan a couple of months ago."If you fail to plan, you plan to fail!"  On another note, I am regretting not buying the small test kiln.  Twice , and now three times I have lost a customer or an opportunity because I didn't have a smaller kiln to do test tiles in or cone 10, or whatever.  I will be getting one soon.""",
"""make hump molds make slab shapes:  babies, women,large star windchimes, small stars make multi-level vase  garden tiles or initial tiles peacock tray baby stuff for daniel-frame w letters, ornament, train plaque,send tiles for footprints(also to Liz) mix colored dipping glazes in quart containers from Smart and Final marketing tools needed: scout flyers party flyers new maps or general flyers summer camp schedule flyers baby footprint postcards fix website babyfootprint gift certificates""",
"""well, the Robinson mural worked out.  4 tiles cracked or broken, all re-painted.  Hope they like them.  The Cordillera mural is getting bigger everyday, now 75 more tiles than anticipated.  They look beautiful going into the kiln.  Double stilting them for less warping.  Had a chance to teach a self-portrait class for children;one of my favorites.  This week had my second student for handmade tiles who wants to go into business as such.  After a little trepidation, I gave into the fact that I am a teacher, and so I teach.  She however wants to learn clay crafting, versus painting, so this is fun.  Got to break open my plaster, and discovered I have a love-hate relationship with plaster carving. Thought it might be easier if I colored the plaster in three after mixing the batch and pour it in layers, so you could see what you are doing. I do love pouring molds.""",
"""I have a school painting on Monday.  For some reason I have a total mental block about the glazes.  Couldn't get the bottles I want,don't want to use the old ones.  Debating which glazes to use, the yucky cheap ones, or the nice expensive ones.  Usually, I have no choice.  I just don't want to spend any more money on half-used glazes. I have a hundred different colors, but not enough of any one to fill 12 bottles.""",
"""Well, I got a call from the mural organizer who asked if the tiles would be ready to view tomorrow.  Of course, I hadn't even re-fired the tiles yet, or done the two "dog paw" accents, or the tile that no one wanted to paint, or the stupid 4 tile center.  So I had to fess up, and beg for more time.  Of course, now that my rear is on the line, I managed to become amazingly inspired and finish the two accent tiles while my kids got ready for school.  I cancelled a doctor's appointment, and painted the stupid center.  Not perfect and beautiful like the last one, but it's there at least.  Now the dilemma of how lazy do I want to be?  Should I leave it black and white, or color it in, and if I color it in, how much color should I bother with?  Or is it passable as it is?  Mostly, I just fear total failure like the last time.""",
"""The great news is the mural I started working on last year, I was asking $6/tile. At that rate I wasn't going to get anywhere, so I asked for $8/tile. I've waited a year, and the lady felt so bad, she said they would give me $10/tile..Yippee!""",
"""Today I need to start mixing glazes for the last tile painting for the school year. The question is can I get 200 of the new nozzle bottles I tried out this weekend by Friday shipped and filled? Or should I go with what I have , which now seems like crap compared. They of course will never know the difference. Re-painting 2 tiles that broke, refiring two broken tiles, and refiring 6" tiles that the glaze didn't flatten out all the way. Must be done asap because they are setting this weekend, and has already been delayed once. Guess I better start re-painting those stupid 4 part mural tiles. Ugh!""",
"""Today I had a glass artist over for a firing.  It was a good excuse to do some research on fused glass.  My past attempt at painted,fused, and slumped glass turned out so-so.  I have some material already, so it would be nice to learn how to use it properly.  She is an older woman, and I had a nice time talking about glass with her; how she sells her stuff (in Venezuela), and just about life as an artist.  I custom programmed my kiln (she usually does it manually).  It was interesting to know that you can open a red hot flaming kiln with glass inside with no breakage.  I can't wait to see what is inside.  Learned about cutting glass, slumping in bisque, applying enamels to gum arabic through a sifter, using elmers glue to stick shards of glass together, using a metallic sharpie to write with on glass, and firing inclusions and dichroic glass.  Like to test frit on clay and glass.  baking soda makes bubbles between glass (use sparingly).Use of fiber paper vs. kiln wash.  slump at a higher temperature and fire paint at a lower temp to keep intensity of color.Use ceramic frames for drop molds (dishes)  Cracked bisque works fine as a glass saggar! """,
"""Attending NCECA in San Diego in 2003 was a turning point for me in many ways. Little did I know when several people asked if I was going, that it was more than I could have imagined. I went reluctantly, tired from work, but curious. When I got there I was lost, and wandered aimlessly, not knowing what I had walked into. I paid my $65, and set off to see what it was that everyone thought was so great. I wandered in and out of lectures and demonstrations. I was most interested in the business lectures, only really wanting to find a way to make a living doing what I love. I wandered through exhibits, spying the mug sale, the cone box contest, and the k-12 children's entries. Was my stuff up to par with the "real" teachers who had a degree? I vowed I would enter next year, just to be competitive.(I didn't , but that's another story). I was really excited to go to the basement area where everyone was selling everything. Tools I didn't know existed, schools beckoning (asking myself, how would my life had been different if I had majored in ceramics, and not married and had children), companies throwing samples my way by the caseloads. Paper, paper, and more paper. The next day was better, knowing that I was there to learn as much as possible in a short time period. I sat through lectures and demos. I absorbed conversations and watched people look and watch. When I got back home to my studio, I wasn't the same. When I left, I was a housewife that had more than a passing interest in a hobby. I was an entrepeneur, trying to find the holy grail that would catapault me from sometimes breaking even to supporting myself. When I came back, I felt like an artist. I realized I knew much more than I thought. I realized that the real world experience I had jumped into blindly had given me more opportunity than most people get in a lifetime of study. I saw my life 20 years from now, and 40 years from now, planning what I would like to do when the kids are grown and this season of my life had passed. I saw myself, 70 years old, touching the clay and asking the questions...... First , when I got back to work, I was engulfed by production and exploring new avenues of business. I taught with a new confidence, that yes, I knew what I was doing with what I did, and everything else would come later. I experimented more, and slowly the studio became a studio, not a storefront. I had an apprentice, and a muse. I would spend hours with the music on, in the silent of my space, pondering the next projects, or working with ferocity. I realized the sacrifices I had made as an artist, in my ventures as a businesswoman. I had no extra time or energy to "create" for the sake of creating, going into the unknown with no "agenda". I did not know what that felt like. I closed the studio. It felt like death. Where was my purpose without a store to support? I hated being just a mother. I almost couldn't do it, and didn't have to. I had renegotiated my lease for pennies. But I knew I had to cut off my arm for another one to grow literally. I moved the studio to my home, like a lot of potters do. I am lucky that I have patient people who live with me that accept the studio taking over the whole of the house. The driveway, the garage, the courtyard, the livingroom, the office, even in the bedroom. They know my sanity lies in it.""",
"""ceramic doorhangers with addons fused in themes: horse,flower ect-blank for dry erase ceramic lightswitch faceplates with addons fused in themes also pour lightswich plates then handbuild over them and around them gifts to do:scriffito doorhangers for stefani,emily,and natalie daniel and also ceramic babybottle bank for daniel and babyblocks frame family tree large tile with handbuilt additions and a "wall" or fence around it GO GET 200 BOTTLES FROM C +C WHEREHOUSE AND PICKUP AND RANDIS""",
"""recently tried a new dipping clear that unfortunately was discontinued due to lead leeching. Won't use it on dinnerware, but, oh my god, it is beautiful. Good thing I didn't return it to the factory like they wanted. Wonder if they'll still sell it with a different label warning. They should! I will write them because they took an uneccessary beating because of the mistake."""]  # Provide a list of texts from author A
texts_negative = ["""						
As promised, here's the next instalment of bus mongs.  I bet you've been looking forward to this, haven't you...   2. Bus Monitors  Now, in every walk of life, in every profession, in every place where humans exist there are heirachies.  I accept these heirachies with varying degrees of grace.  But, if there is one thing that makes me want to stick two fingers up to "The Man" and form a rock n' roll band, it's people who assume importance and status without any requirement for them to exist.  I have to be careful here to convey exactly what I mean.  I want you to understand.  Two elderly women on my bus service have elected themselves bus monitors.  As far as I know, there was never any formal nomination.  Let's be clear; these people have assumed the position of bus lords.  This basically involves:  a)  Sitting right behind the driver and shouting conversations at him in a "spirit of the blitz" style dialect.   Eg: "Ooh 'ello Frank, I 'ope you'll be putting yer foot down today, my Bert's expecting his dinner!".   Essentially, mindless, insiduous prattle.  The volume at which these conversations take place cow everyone around them into aural submission.  No-one can read, listening to music is impossible, and quiet chats with friends are verboten.  Essentially, this is an exercise in illustrating that they are friends with the driver, and so assume some of the importance they crave by association.  They rarely look around or even notice other bus people, the bus people they nominally claim to represent.  b) Getting on the bus first.  This is truly the raison d'etre of the bus monitor.  They force themselves, elbows and handbags flailing, onto the buses first for three reasons.  Firstly, this (again) gives them the air of importance and status that they crave.  Secondly, getting on the bus first gives them first choice of seats - they can then position themselves in prime bus real estate for loud driver conversations.  Thirdly, this allows them to have protracted chats with the driver, and fumble for their tickets whilst a large queue stretches back outside getting drenched in the rain.   c)  On the rare occasions where a new driver has been in place (I always feel great sympathy for these hapless footsoldiers, thrust naively onto the battlefield), bus monitors enter a state of heightened awareness.  Not content with shouting often unnecessary directions into the side of the driver's head, they will also offer information on who normally gets on at those stops, whether to wait for them if they aren't there and other classified, bus-monitor-priveleged information.  MI5 themselves would have dossiers less detailed on members of the Taliban.  d) On the rarer still occasions where the bus makes a wrong turning, the bus monitors become a flurry of activity.  "Wrong way!" they shout, whilst looking around incredulously at fellow passengers, as if the driver had defaced a war memorial.  e) Bus monitors are the guardians of bus protocol.  Although they can blatantly disregard other passengers, any kind of ignorance on the part of other passengers is met with disapproving looks.  Any breach of accepted protocol, whether or not you have ever been in this country before, been on a bus before, have the use of your arms and legs etc is met with their clear disgust.    Wedged into their seats with their old-woman paraphanelia, these are actually quite sad individuals.  I can only imagine the voids in their lives must have become slightly less yawning when they found solace in bossing people about on buses.  In two years of bus usage, I have yet to see them justify their self-appointed positions, and on top of it all, they clearly enjoy this.  They act like they are doing me a favour.  If getting on my nerves and stinking of Parma Violets is somehow helping me, I can only marvel at what my shortcomings must have been to start with.  Perhaps I was too relaxed and the bus didn't smell of Parma Violets enough.  We can but wonder.  This is just a small sample of the irritations that these people cause, and for once, I am not just saying that because I can't think of anything else.  It really is just a small sample.""",
"""						
In case any of you people care, I am one of the hardy souls of this world who commute to work.  Yes, I get the bus.  And I like it.  In fact, a 30-45 minute journey in the morning is an unbelievably relaxing way to get to work.  In a carefully temperature controlled cocoon, you can pop a bit of music on and watch the scenery, leaving all the actual "doing" to someone else.  Namely the driver.  It's difficult to convey the benefits of merely sitting, doing nothing, on an adequately comfortable seat, and not having to worry about anything for half an hour.   This is, in theory, superb.  However, my idyll in this metal tube with wheels is frequently tested by putrid invaders.  Invaders of the worst kind.  Space invaders, if you like.  I have attempted to categorise them in a new series, starting below.  All users of public transport will identify them.  And though they have many names, their presence is unmistakable.    1.  The feckless youngster.  Yesterday a regular user of my bus service, a feckless young girl, brought into sharp focus why I hate other bus people so much.  Perhaps I should avoid the term "bus people", as this either suggests a gypsy-like existence in an abandoned bus, or people who actually resemble buses.  Either way, it's not what I am trying to say.  Basically, I shall now define "bus people" as people who get the bus, in order to avoid confusion.  Anyway, her crimes against me are myriad and serious.  In an international court of bus law (ICBL) she would probably be tried and sentenced to death.   We have a distinct history.  It all started when, about 18 months ago, this individual started to wait at my stop.  Looking little different from the usual slack-jawed windowlickers of my home town, I paid little heed, instead assuming my favourite bus-waiting position of roughly perpendicular to the shelter in order to look up the hill, legs heroically akimbo like the Collossus of Rhodes.  I was somewhat surprised when she boarded the private vehicle which takes me to work.  Perhaps I had misjudged her, despite her appearance and demeanour.   A few weeks passed with respectful silence between us.  All was well, and I felt we had formed an invisible bond of ignoring eachother.  But then, a terrible thing happened. One day, she approached the bus stop, and I was unfortunate enough to momentarily lock eyes with her.  This, as most people would doubtless know, is a pre-cursor to some kind of conversation.  To my alarm, I had discovered that my mouth was open as well.  Snapping it shut, I did my best to rescue the situation.  I noticed that something was different about her... something was amiss.  My mind raced to pin it down.  Of course! Her hair.  She had dyed her hair.    "I like your hair" I said, before the full disastrous impact of what I had done hit me.   I had sparked up a conversation with a bus person!  No more louche days reading in the window seat, listening to the latest grooves.  No more beautiful days watching the speeding countryside.  I would be sucked in, engulfed in this desperate harlot's whirlygig of hair chat.  Maybe the whole situation would escalate to shopping, or worse, work.  Oh cruelest of all fates!!! Why?  Why did my tongue forsake me, when I most needed it to stop it's diabolical dance!  "Oh, thanks, I only di...."  By this time I had run onto the bus.  I couldn't risk more contact or possible friendship with this woman.  She would doubtless destroy what little peace I could wrestle from my day.  More would come of this, I was sure, and indeed it did.  An insidious campaign of irritation followed.  Once, the bus arrived ridiculously early, and we both missed it.  An uncomfortably long period of waiting ensued, before it was clear that no bus would be coming.  I was forced by the situation to offer a non-commital "I think we've missed it".  She rudely turned her back and stormed off, frantically jabbing at her mobile phone.  As we were both bound for the same destination, and we had both missed the same bus, a nice gesture would have been to offer a place in the lift she was undoubtedly arranging (although I would have turned her down on principle).  Instead she glared at me as if I had somehow Karmically arranged the absence of the bus in order to ruin her day.    This week alone, of the 5 days which are busable, she has neglected to have a ticket on 3 days.  This is not only gyppo behaviour, but is also an embarrassing social situation, which I seek to avoid at all times.  All 3 times, she has been "let off" the fare, which has only exponentially increased my contempt for her.  Then there's the running.  I get on the bus first, due to clever kerbside positioning.  She gets on immediately afterwards, and I swear she runs directly behind me, hurrying me along.  I feel obliged to hurl everything into the seat and dive out of her way.  Why she feels the need to hurtle up the bus is a mystery to all except me.  To me, it is but more evidence of her idiocy.    It's clear she thinks she is the J-Lo of the bus community.  Well she got her commupance today alright.  As the bus drew near, some schoolchildren passed us.  Their cries of "She's got a £2 handbag!" were delight to my ears as they systematically humiliated my self-important co-busee, who dresses like someone doing an impression of a character from Sex in the City down on their luck.  Other times the bus has pulled away, as she frantically runs behind it, and I have merely sat, smiling smugly.  Oh, good will have it's days.  But such are the cosmic forces of yin and yan that my victories are only part of a timeless struggle.  One which must be won at all costs. 
""",
"""						
They're Good, but Let's Not Start Any Wars Over Them   Well, in a new section of the page, I look at music and decide whether it's any good, for the benefit of you, the reader.  I will call it "My Opinion on Music".  Or "Reviews".  Yeah, that one.       Well, Franz Fedinand (or "The 'Nand" as I haven't christened them) are a Scottish indie type outfit.  That doesn't do them justice - "Indie" is used far too loosely nowadays to have any real meaning.  In this instance, let's take it to mean that they are progressive and slightly non-conformist. What's their sound like? I'll tell you.  They owe a big debt to Tom Verlaine and Television.  That kind of skewed funkiness cut through with some melodious guitar work and bass lines.  Then, in other instances, lead singer Alex Kapranos sounds like a more coquetteish Ian Curtis.  Either way, the mix spells funky and the music spells good. There's flashes of Iggy Pop's The Idiot in the density of some of the tracks, flashes of The Pixies in the pop-artful approach to lyrics.  Bizzarely, some parts of the album also recall Blondie at their Parallel-Lines zenith.  You work it out.  I can't be bothered. I've read and heard comparisons to "The 'Werk" (Kraftwerk).  This is pretty crass on the surface - there's snatches of German on some tracks, which is probably the main reason for the comparisons.  However, having said that, there is an undercurrent of a peculiarly teutonic baroque.  Difficult to pin down, but themes like darkened cinemas and dancing with men called Michael conjure a particularly Weimar atmosphere, in my mind at least. So we've established that their influences are a smorgasbord of left-field  artists.  But what is the driver that make The Nand stand out? Well there are moments of adreneline pumping brilliance.  The type that makes you want to go out and have a fight or run really fast, like all the best music does.  The opener "Jacqueline" is a multi-layered romp which displays a joy for words and sound which is refreshing.  It's slightly self-consciously skewed - it's not full-on absurdity, but has kind of taken a toffee hammer and tapped the norm hard enough to make it less normal.  Rhyming "spectacles" with "erecticles" is one such example.  The barnstorming chorus, which extols the virtues of holidaying is another.  A well rounded debut, all in all, but as a friend said to me after the Stroke's first album - "Where do they go from here?".  They might have just painted themselves into a corner by releasing something so polished so soon. Time will tell, but until that time tells, don't go assassinating any Archdukes.  
"""    ,    """						
I can't think of anything to write today, so this is going to go one of two ways.  Either I will turn this into an entertaining missive on not being able to write anything, or it will just grind to a halt, teetering precariously on the keep/delete axis.  Nearly ground to a halt after that sentence.  I suppose this hinges now on how long I have to continue for to make this a missive.  I don't know if there is a central agency which sets the length of missives, tracts and statements.  If not, there should be.  It would at least prevent confusion at times like this. 
""" , """						
I thought today about forming a Lonely Club.  Not that I'm lonely, but it seemed a compassionate thing to do.  To get lonely people together in a non-threatening atmosphere.  I could send out leaflets which say things like "Spend a lot of time on the Playstation?" or "Lonely?".  I think there would be a good response.  And then I could franchise it out, to other Lonely Co-ordinators - an entire network of Lonely Clubs could spring up, eradicating loneliness forever.  But then I thought, what if no-one turned up?  Could there be anything more tragic than someone organising a Lonely Club meeting and ending up totally alone.  That could push some Lonely Club organisers over the edge.  I suppose they could work with a friend, you know, so they didn't get Lonely. 
""" , """						
So I got my Digital Camera and I pretty much have it all figured out, I just need to know how to get pictures posted up on here now.  So off I go to explore and hopefully the next post will have a picture.  WEEEEEEEEEEEEEEEEEEEEE!!!
""" , """						
Did you ever wake up one day and everything just seemed to go totally right?    You actually want to get out of bed even though it is 4:00 a.m., your shower is awesome, your son is ready ON TIME for school, you look decent, the road to work is practically flawless and your favorite songs are all playing on the radio, your Mocaccino is Orgasmic and your Boss is in such a chirpy mood...    Well, today is that day for me and it just keeps on getting better.  My Boss told me that today was THE DAY for my bonus (Prefect timing because I have really been wanting that Digital Camera).  My cousin is in town and I rarely see her (she moved to Alberta, then Yellow Knife, now Niagara Falls) and a bunch of us are going to go for drinks tonight so its going to be picture time.  Its not sunny outside, but its warm (mostly humid but it's ok cause I left my hair curly today).  I am going to actually have time to take a full hour lunch and I will get to do so with my dad, brother and cousin.  And last but not least (or maybe Least but not last?) I am actually getting quite a bit of work done (well not right THIS second) so I wont feel guilty this weekend and think about all the things I have to do on Monday.  I dunno, maybe this is lack of sleep talking, but it really is a great day, it doesn't take much to please me huh? LOL  Ok, well back to work I go, have a good weekend.
""" , """						
I think I have had enough with men for at least the next 5 years.  Either I am super unlucky or I am a real Bitch (I am leaning more towards the earlier).  I can't seem to meet anyone half decent even if my life depended on it.  I am not talking about a serious, lets be monogamous type of relationship, I am talking about a simple friendship.  I have been talking to this guy for 4 years, Matt, aka Mr. Arkansas, we have shared every little secret (or at least I did) and every thought and fantasy and feeling and out of no where I am a bitch because I demand a little more after four fucking years.    My fuck friend on the other hand... He's in general not too bad... I just only see or hear from him when he wants some, god forbid Cindy has any needs.  There have been other guys over the last 2 years, one so called friend only called me or came by to smoke-up and watch movies when he was single, once he found himself a new fling, bye bye Cindy.  I called him on it the last time it happened, he said he would try and squeeze me in, I told him not to bother and guess what, that was the end of that.  This other guy, claimed he liked me and wanted to date me and so on, it was all BS.  I am ok with casual sex, I am only human and have needs too.  If that's all you want, just be up front about it and whatever decision I make at least it is my fault and I can't blame anyone but myself.  You would think that is pretty simple but no, not in this world.  I don't really know what I want and I am not out there trying to hook up with anyone, however I do know one thing, I want a friend (a male one, actually a woman would do just fine), I guess I am looking for a friend that I can be intimate with and also depend and trust.  I don't know if that makes sense, I'm so confused, I'm so tired of being alone.  Blah!!!
""" , """						
One of those killer days where nothing goes right for the boss, and you get blamed or the littlest thing happens and you get tons of shit.  I have a pounding head ache, I haven't had lunch or any break as a matter of fact.  All I want to do is go home, eat and take a long ass MOFO bubble bath.  Ciao!
""" , """						
Is my interest in this whole thing waning?  It wouldn't surprise me.  I had a go with a couple of these before.  They turned out rubbish. On the other hand, this is day 2.  You're still here.  So am I.  Both of I.  So this has turned out great!  Maybe a 2nd anniversary party should be arranged.  I have just the people in mind.
""" , """						
So I had a new patient yesterday, a man in his 90's, a sweet old man who is probably very lonely.  I always ask my patients how their weekend was or if anything special happened in their week and one thing led to another and I found out that his grandson never visits.  He lives in town and it is so sad that he never goes and visits.  The patient was telling me that he has never even met his great grand kids.  I just couldn't believe it.  I asked him if he had any other family in town and he mentioned a grand daughter.  When she came to pick him up, I pulled her aside and told her she needs to bring her kids to visit their grand father and great grand father.  She said that she would try and make an effort but she didn't really know what to say to her kids.  I was floored.  It's your family, you don't need to say anything special, just go and visit.  They left and I just felt so sad.  I really hope she does make an effort.  I asked my kids if they would ever not visit their grand parents and they said they would be upset if they couldn't visit them.  I hope I am raising them right, I would be so sad if they didn't visit me when I was old and couldn't do things on my own. 
""" , """						
Today is the anniversary of Elvis' death.  What do you think happened to Elvis?  Is he still alive?  I don't think he is, I mean look at all the cheeseburgers he ate... However, I am one of those people who like to believe that he is still alive, you know, just chillin, living on some remote island with Tupac and Biggy.  You know, that's really not that weird of a theory. 
""" , """						
I'm gonna go ahead and assume that a majority of the people who read this don't watch much t.v. or if you do, its most likely Discovery, History, National Geographic or some other channel that requires you to think a little bit (come on, if you watch the learning channel, you at least have to think a LITTLE).  I too, watch those channels, but every now and then, I like to shut off my brain and watch some mindless crap.  So last night, I watched one of my favorite movies (mainly cause it makes me laugh) "Sweet Home Alabama".  Love it.  If you have never seen it, shame on you!  You need to go and rent it right now, go ahead, I'll wait....   Ok, now that you have seen it, don't you just love it?  It's so cheesy and so predictable but you know what, I love those types of movies.  Another movie I love, "Two Weeks Notice", have you seen it?  Its another good one.  I have to say, Sandra Bullock and Hugh Grant make a great pair.  Oh, and lets not forget "Bridget Jones' Diary", how can you NOT love that movie?  You gotta love Bridget, she's awesome.  Hmm, what other movies do I like?  OH, "How to Lose a Guy in Ten Days", Love that one too.  Kate Hudson and Matthew McConaughey are awesome together.  Ok, I think that's enough for now, I could go on and on.  You should write to me and let me know what movies you like to watch, I am always on the look out for a good chick flick.  Ok, well, I'm out, have a great day!   """
] 

# Concatenate positive and negative examples
texts = texts_positive + texts_negative
labels = [1] * len(texts_positive) + [0] * len(texts_negative)

# Tokenize the text
tokenizer = tf.keras.preprocessing.text.Tokenizer()
tokenizer.fit_on_texts(texts)
sequences = tokenizer.texts_to_sequences(texts)

# Pad sequences to make them of equal length
max_sequence_length = max([len(seq) for seq in sequences])
padded_sequences = tf.keras.preprocessing.sequence.pad_sequences(sequences, maxlen=max_sequence_length, padding='post')

# Convert to NumPy arrays
padded_sequences = np.array(padded_sequences)
labels = np.array(labels)

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(padded_sequences, labels, test_size=0.2, random_state=42)

# Define the model
embedding_dim = 50
vocab_size = len(tokenizer.word_index) + 1
model = Sequential([
    Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=max_sequence_length),
    GRU(128, dropout=0.2, recurrent_dropout=0.2),
    Dense(64, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=10, batch_size=16, validation_split=0.1)

# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print("Test Accuracy:", accuracy)

# Function to predict if text belongs to the author
def predict_authorship(text):
    seq = tokenizer.texts_to_sequences([text])
    padded_seq = tf.keras.preprocessing.sequence.pad_sequences(seq, maxlen=max_sequence_length, padding='post')
    probability = model.predict(padded_seq)[0][0]
    return probability

# Test prediction
test_text = "A new text by Author A."
probability = predict_authorship(test_text)
print("Probability of belonging to the author:", probability)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test Accuracy: 0.5
Probability of belonging to the author: 0.53770155


In [11]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, GRU, Dense, Dropout, Concatenate
from sklearn.model_selection import train_test_split
import nltk
from nltk import bigrams
from collections import Counter
import string
from nltk.corpus import stopwords
import networkx as nx

# Define functions for additional features

def phrase_patterns(text):
    tokens = text.split()
    bigram_counts = Counter(bigrams(tokens))
    significant_collocations = [bigram for bigram, count in bigram_counts.items() if count > 1] # Example threshold for significance
    return significant_collocations

def punctuation_similarity(text1, text2):
    punctuation_marks = set(string.punctuation)
    punctuation_count_text1 = sum(text1.count(char) for char in punctuation_marks)
    punctuation_count_text2 = sum(text2.count(char) for char in punctuation_marks)
    return min(punctuation_count_text1, punctuation_count_text2) / max(punctuation_count_text1, punctuation_count_text2)

def sentence_length_similarity(text1, text2):
    sentences_text1 = nltk.sent_tokenize(text1)
    sentences_text2 = nltk.sent_tokenize(text2)
    avg_length_text1 = sum(len(sent.split()) for sent in sentences_text1) / len(sentences_text1)
    avg_length_text2 = sum(len(sent.split()) for sent in sentences_text2) / len(sentences_text2)
    return min(avg_length_text1, avg_length_text2) / max(avg_length_text1, avg_length_text2)

def pos_tag_similarity(text1, text2):
    pos_tags_text1 = [tag for word, tag in nltk.pos_tag(nltk.word_tokenize(text1))]
    pos_tags_text2 = [tag for word, tag in nltk.pos_tag(nltk.word_tokenize(text2))]
    pos_tag_set1 = set(pos_tags_text1)
    pos_tag_set2 = set(pos_tags_text2)
    return len(pos_tag_set1.intersection(pos_tag_set2)) / len(pos_tag_set1.union(pos_tag_set2))

def function_words(text):
    stop_words = set(stopwords.words('english'))
    tokens = nltk.word_tokenize(text)
    function_words_text = [word for word in tokens if word.lower() in stop_words]
    return function_words_text

def ngram_transition_graph(text, n=2):
    tokens = nltk.word_tokenize(text)
    ngrams = list(nltk.ngrams(tokens, n))
    transition_graph = nx.DiGraph()
    for i in range(len(ngrams) - 1):
        transition_graph.add_edge(ngrams[i], ngrams[i + 1])
    return transition_graph

def ngram_transition_graph_similarity(graph1, graph2):
    nodes_graph1 = set(graph1.nodes)
    nodes_graph2 = set(graph2.nodes)
    intersection = nodes_graph1.intersection(nodes_graph2)
    union = nodes_graph1.union(nodes_graph2)
    return len(intersection) / len(union)

def type_token_ratio(text):
    tokens = nltk.word_tokenize(text)
    unique_tokens = set(tokens)
    return len(unique_tokens) / len(tokens)

def voice_detection(sentence):
    # Example implementation using simple keyword matching
    if 'is' in sentence.split() or 'are' in sentence.split():
        return 'passive'
    else:
        return 'active'

# Your original code starts here

# Sample data (replace with your dataset)
texts_positive = ["""The 4 tile mural I worked a week on went into the kiln,along with everything else, and thankfully everything was ok except the mural.  The underglaze was too thick, and the glaze was too thick (I decided to float glaze the tile for fear of smearing all the black.)  The glaze actually picked up the black, moved it over, and then fired in a big blob in several spots.  Live and learn.  I just don't know if I have it in me to make another one.   I'll have to dig deep for this one.  I guess I should be thinking of the kiln as half full instead of half empty!""",
"""I'm so tired today because I was up all night worrying about the kiln firing.  It smelled something fierce, and I was worried we were all going to die of carbon monoxide poisoning in our sleep.  Plus, I kept hearing banging, which I hope wasn't anything exploding in the kiln, but I haven't found out yet because it's still 600 degrees the next day.""",
"""I happily called the lady about the tile mural that was just set to see how great it looked.  To my suprise, she was very shocked at how warped the tiles look when set.  I was so upset I had nightmares all night and obsessed about it all day. Luckily, she called me back the next day to apologize because she was in a bad mood.  P.s.  Their check was returned the next day.""",
"""Today I must get 100 bisque white tiles today.  My supplier of 7 years has my order of 4 weeks ago delayed in Mexico in customs.  Note to self:  Remember Murphy.  Never assume anything.""",
"""MUST PAY SALES TAXES TODAY OR ELSE!  Something about a deadline is a sure cure to get one off one's butt.  Yesterday placed a free ad in the local paper. Cost for free, and for free stuff-This should be VERY interesting to see the response. This week I should test some cone 5 glazes just for the heck of it.""",
"""fired the last mural.  now I'm depressed.  it's kind of like planning a wedding all year and then the day after you have nothing to do.  Not that I have nothing to do, just no one telling me what I have to do. (work orders) I could have been better prepared with a summer camp to fall into; I'm certainly getting phone calls.  But I was too sick to plan a couple of months ago."If you fail to plan, you plan to fail!"  On another note, I am regretting not buying the small test kiln.  Twice , and now three times I have lost a customer or an opportunity because I didn't have a smaller kiln to do test tiles in or cone 10, or whatever.  I will be getting one soon.""",
"""make hump molds make slab shapes:  babies, women,large star windchimes, small stars make multi-level vase  garden tiles or initial tiles peacock tray baby stuff for daniel-frame w letters, ornament, train plaque,send tiles for footprints(also to Liz) mix colored dipping glazes in quart containers from Smart and Final marketing tools needed: scout flyers party flyers new maps or general flyers summer camp schedule flyers baby footprint postcards fix website babyfootprint gift certificates""",
"""well, the Robinson mural worked out.  4 tiles cracked or broken, all re-painted.  Hope they like them.  The Cordillera mural is getting bigger everyday, now 75 more tiles than anticipated.  They look beautiful going into the kiln.  Double stilting them for less warping.  Had a chance to teach a self-portrait class for children;one of my favorites.  This week had my second student for handmade tiles who wants to go into business as such.  After a little trepidation, I gave into the fact that I am a teacher, and so I teach.  She however wants to learn clay crafting, versus painting, so this is fun.  Got to break open my plaster, and discovered I have a love-hate relationship with plaster carving. Thought it might be easier if I colored the plaster in three after mixing the batch and pour it in layers, so you could see what you are doing. I do love pouring molds.""",
"""I have a school painting on Monday.  For some reason I have a total mental block about the glazes.  Couldn't get the bottles I want,don't want to use the old ones.  Debating which glazes to use, the yucky cheap ones, or the nice expensive ones.  Usually, I have no choice.  I just don't want to spend any more money on half-used glazes. I have a hundred different colors, but not enough of any one to fill 12 bottles.""",
"""Well, I got a call from the mural organizer who asked if the tiles would be ready to view tomorrow.  Of course, I hadn't even re-fired the tiles yet, or done the two "dog paw" accents, or the tile that no one wanted to paint, or the stupid 4 tile center.  So I had to fess up, and beg for more time.  Of course, now that my rear is on the line, I managed to become amazingly inspired and finish the two accent tiles while my kids got ready for school.  I cancelled a doctor's appointment, and painted the stupid center.  Not perfect and beautiful like the last one, but it's there at least.  Now the dilemma of how lazy do I want to be?  Should I leave it black and white, or color it in, and if I color it in, how much color should I bother with?  Or is it passable as it is?  Mostly, I just fear total failure like the last time.""",
"""The great news is the mural I started working on last year, I was asking $6/tile. At that rate I wasn't going to get anywhere, so I asked for $8/tile. I've waited a year, and the lady felt so bad, she said they would give me $10/tile..Yippee!""",
"""Today I need to start mixing glazes for the last tile painting for the school year. The question is can I get 200 of the new nozzle bottles I tried out this weekend by Friday shipped and filled? Or should I go with what I have , which now seems like crap compared. They of course will never know the difference. Re-painting 2 tiles that broke, refiring two broken tiles, and refiring 6" tiles that the glaze didn't flatten out all the way. Must be done asap because they are setting this weekend, and has already been delayed once. Guess I better start re-painting those stupid 4 part mural tiles. Ugh!""",
"""Today I had a glass artist over for a firing.  It was a good excuse to do some research on fused glass.  My past attempt at painted,fused, and slumped glass turned out so-so.  I have some material already, so it would be nice to learn how to use it properly.  She is an older woman, and I had a nice time talking about glass with her; how she sells her stuff (in Venezuela), and just about life as an artist.  I custom programmed my kiln (she usually does it manually).  It was interesting to know that you can open a red hot flaming kiln with glass inside with no breakage.  I can't wait to see what is inside.  Learned about cutting glass, slumping in bisque, applying enamels to gum arabic through a sifter, using elmers glue to stick shards of glass together, using a metallic sharpie to write with on glass, and firing inclusions and dichroic glass.  Like to test frit on clay and glass.  baking soda makes bubbles between glass (use sparingly).Use of fiber paper vs. kiln wash.  slump at a higher temperature and fire paint at a lower temp to keep intensity of color.Use ceramic frames for drop molds (dishes)  Cracked bisque works fine as a glass saggar! """,
"""Attending NCECA in San Diego in 2003 was a turning point for me in many ways. Little did I know when several people asked if I was going, that it was more than I could have imagined. I went reluctantly, tired from work, but curious. When I got there I was lost, and wandered aimlessly, not knowing what I had walked into. I paid my $65, and set off to see what it was that everyone thought was so great. I wandered in and out of lectures and demonstrations. I was most interested in the business lectures, only really wanting to find a way to make a living doing what I love. I wandered through exhibits, spying the mug sale, the cone box contest, and the k-12 children's entries. Was my stuff up to par with the "real" teachers who had a degree? I vowed I would enter next year, just to be competitive.(I didn't , but that's another story). I was really excited to go to the basement area where everyone was selling everything. Tools I didn't know existed, schools beckoning (asking myself, how would my life had been different if I had majored in ceramics, and not married and had children), companies throwing samples my way by the caseloads. Paper, paper, and more paper. The next day was better, knowing that I was there to learn as much as possible in a short time period. I sat through lectures and demos. I absorbed conversations and watched people look and watch. When I got back home to my studio, I wasn't the same. When I left, I was a housewife that had more than a passing interest in a hobby. I was an entrepeneur, trying to find the holy grail that would catapault me from sometimes breaking even to supporting myself. When I came back, I felt like an artist. I realized I knew much more than I thought. I realized that the real world experience I had jumped into blindly had given me more opportunity than most people get in a lifetime of study. I saw my life 20 years from now, and 40 years from now, planning what I would like to do when the kids are grown and this season of my life had passed. I saw myself, 70 years old, touching the clay and asking the questions...... First , when I got back to work, I was engulfed by production and exploring new avenues of business. I taught with a new confidence, that yes, I knew what I was doing with what I did, and everything else would come later. I experimented more, and slowly the studio became a studio, not a storefront. I had an apprentice, and a muse. I would spend hours with the music on, in the silent of my space, pondering the next projects, or working with ferocity. I realized the sacrifices I had made as an artist, in my ventures as a businesswoman. I had no extra time or energy to "create" for the sake of creating, going into the unknown with no "agenda". I did not know what that felt like. I closed the studio. It felt like death. Where was my purpose without a store to support? I hated being just a mother. I almost couldn't do it, and didn't have to. I had renegotiated my lease for pennies. But I knew I had to cut off my arm for another one to grow literally. I moved the studio to my home, like a lot of potters do. I am lucky that I have patient people who live with me that accept the studio taking over the whole of the house. The driveway, the garage, the courtyard, the livingroom, the office, even in the bedroom. They know my sanity lies in it.""",
"""ceramic doorhangers with addons fused in themes: horse,flower ect-blank for dry erase ceramic lightswitch faceplates with addons fused in themes also pour lightswich plates then handbuild over them and around them gifts to do:scriffito doorhangers for stefani,emily,and natalie daniel and also ceramic babybottle bank for daniel and babyblocks frame family tree large tile with handbuilt additions and a "wall" or fence around it GO GET 200 BOTTLES FROM C +C WHEREHOUSE AND PICKUP AND RANDIS""",
"""recently tried a new dipping clear that unfortunately was discontinued due to lead leeching. Won't use it on dinnerware, but, oh my god, it is beautiful. Good thing I didn't return it to the factory like they wanted. Wonder if they'll still sell it with a different label warning. They should! I will write them because they took an uneccessary beating because of the mistake."""]  # Provide a list of texts from author A
texts_negative = ["""						
As promised, here's the next instalment of bus mongs.  I bet you've been looking forward to this, haven't you...   2. Bus Monitors  Now, in every walk of life, in every profession, in every place where humans exist there are heirachies.  I accept these heirachies with varying degrees of grace.  But, if there is one thing that makes me want to stick two fingers up to "The Man" and form a rock n' roll band, it's people who assume importance and status without any requirement for them to exist.  I have to be careful here to convey exactly what I mean.  I want you to understand.  Two elderly women on my bus service have elected themselves bus monitors.  As far as I know, there was never any formal nomination.  Let's be clear; these people have assumed the position of bus lords.  This basically involves:  a)  Sitting right behind the driver and shouting conversations at him in a "spirit of the blitz" style dialect.   Eg: "Ooh 'ello Frank, I 'ope you'll be putting yer foot down today, my Bert's expecting his dinner!".   Essentially, mindless, insiduous prattle.  The volume at which these conversations take place cow everyone around them into aural submission.  No-one can read, listening to music is impossible, and quiet chats with friends are verboten.  Essentially, this is an exercise in illustrating that they are friends with the driver, and so assume some of the importance they crave by association.  They rarely look around or even notice other bus people, the bus people they nominally claim to represent.  b) Getting on the bus first.  This is truly the raison d'etre of the bus monitor.  They force themselves, elbows and handbags flailing, onto the buses first for three reasons.  Firstly, this (again) gives them the air of importance and status that they crave.  Secondly, getting on the bus first gives them first choice of seats - they can then position themselves in prime bus real estate for loud driver conversations.  Thirdly, this allows them to have protracted chats with the driver, and fumble for their tickets whilst a large queue stretches back outside getting drenched in the rain.   c)  On the rare occasions where a new driver has been in place (I always feel great sympathy for these hapless footsoldiers, thrust naively onto the battlefield), bus monitors enter a state of heightened awareness.  Not content with shouting often unnecessary directions into the side of the driver's head, they will also offer information on who normally gets on at those stops, whether to wait for them if they aren't there and other classified, bus-monitor-priveleged information.  MI5 themselves would have dossiers less detailed on members of the Taliban.  d) On the rarer still occasions where the bus makes a wrong turning, the bus monitors become a flurry of activity.  "Wrong way!" they shout, whilst looking around incredulously at fellow passengers, as if the driver had defaced a war memorial.  e) Bus monitors are the guardians of bus protocol.  Although they can blatantly disregard other passengers, any kind of ignorance on the part of other passengers is met with disapproving looks.  Any breach of accepted protocol, whether or not you have ever been in this country before, been on a bus before, have the use of your arms and legs etc is met with their clear disgust.    Wedged into their seats with their old-woman paraphanelia, these are actually quite sad individuals.  I can only imagine the voids in their lives must have become slightly less yawning when they found solace in bossing people about on buses.  In two years of bus usage, I have yet to see them justify their self-appointed positions, and on top of it all, they clearly enjoy this.  They act like they are doing me a favour.  If getting on my nerves and stinking of Parma Violets is somehow helping me, I can only marvel at what my shortcomings must have been to start with.  Perhaps I was too relaxed and the bus didn't smell of Parma Violets enough.  We can but wonder.  This is just a small sample of the irritations that these people cause, and for once, I am not just saying that because I can't think of anything else.  It really is just a small sample.""",
"""						
In case any of you people care, I am one of the hardy souls of this world who commute to work.  Yes, I get the bus.  And I like it.  In fact, a 30-45 minute journey in the morning is an unbelievably relaxing way to get to work.  In a carefully temperature controlled cocoon, you can pop a bit of music on and watch the scenery, leaving all the actual "doing" to someone else.  Namely the driver.  It's difficult to convey the benefits of merely sitting, doing nothing, on an adequately comfortable seat, and not having to worry about anything for half an hour.   This is, in theory, superb.  However, my idyll in this metal tube with wheels is frequently tested by putrid invaders.  Invaders of the worst kind.  Space invaders, if you like.  I have attempted to categorise them in a new series, starting below.  All users of public transport will identify them.  And though they have many names, their presence is unmistakable.    1.  The feckless youngster.  Yesterday a regular user of my bus service, a feckless young girl, brought into sharp focus why I hate other bus people so much.  Perhaps I should avoid the term "bus people", as this either suggests a gypsy-like existence in an abandoned bus, or people who actually resemble buses.  Either way, it's not what I am trying to say.  Basically, I shall now define "bus people" as people who get the bus, in order to avoid confusion.  Anyway, her crimes against me are myriad and serious.  In an international court of bus law (ICBL) she would probably be tried and sentenced to death.   We have a distinct history.  It all started when, about 18 months ago, this individual started to wait at my stop.  Looking little different from the usual slack-jawed windowlickers of my home town, I paid little heed, instead assuming my favourite bus-waiting position of roughly perpendicular to the shelter in order to look up the hill, legs heroically akimbo like the Collossus of Rhodes.  I was somewhat surprised when she boarded the private vehicle which takes me to work.  Perhaps I had misjudged her, despite her appearance and demeanour.   A few weeks passed with respectful silence between us.  All was well, and I felt we had formed an invisible bond of ignoring eachother.  But then, a terrible thing happened. One day, she approached the bus stop, and I was unfortunate enough to momentarily lock eyes with her.  This, as most people would doubtless know, is a pre-cursor to some kind of conversation.  To my alarm, I had discovered that my mouth was open as well.  Snapping it shut, I did my best to rescue the situation.  I noticed that something was different about her... something was amiss.  My mind raced to pin it down.  Of course! Her hair.  She had dyed her hair.    "I like your hair" I said, before the full disastrous impact of what I had done hit me.   I had sparked up a conversation with a bus person!  No more louche days reading in the window seat, listening to the latest grooves.  No more beautiful days watching the speeding countryside.  I would be sucked in, engulfed in this desperate harlot's whirlygig of hair chat.  Maybe the whole situation would escalate to shopping, or worse, work.  Oh cruelest of all fates!!! Why?  Why did my tongue forsake me, when I most needed it to stop it's diabolical dance!  "Oh, thanks, I only di...."  By this time I had run onto the bus.  I couldn't risk more contact or possible friendship with this woman.  She would doubtless destroy what little peace I could wrestle from my day.  More would come of this, I was sure, and indeed it did.  An insidious campaign of irritation followed.  Once, the bus arrived ridiculously early, and we both missed it.  An uncomfortably long period of waiting ensued, before it was clear that no bus would be coming.  I was forced by the situation to offer a non-commital "I think we've missed it".  She rudely turned her back and stormed off, frantically jabbing at her mobile phone.  As we were both bound for the same destination, and we had both missed the same bus, a nice gesture would have been to offer a place in the lift she was undoubtedly arranging (although I would have turned her down on principle).  Instead she glared at me as if I had somehow Karmically arranged the absence of the bus in order to ruin her day.    This week alone, of the 5 days which are busable, she has neglected to have a ticket on 3 days.  This is not only gyppo behaviour, but is also an embarrassing social situation, which I seek to avoid at all times.  All 3 times, she has been "let off" the fare, which has only exponentially increased my contempt for her.  Then there's the running.  I get on the bus first, due to clever kerbside positioning.  She gets on immediately afterwards, and I swear she runs directly behind me, hurrying me along.  I feel obliged to hurl everything into the seat and dive out of her way.  Why she feels the need to hurtle up the bus is a mystery to all except me.  To me, it is but more evidence of her idiocy.    It's clear she thinks she is the J-Lo of the bus community.  Well she got her commupance today alright.  As the bus drew near, some schoolchildren passed us.  Their cries of "She's got a £2 handbag!" were delight to my ears as they systematically humiliated my self-important co-busee, who dresses like someone doing an impression of a character from Sex in the City down on their luck.  Other times the bus has pulled away, as she frantically runs behind it, and I have merely sat, smiling smugly.  Oh, good will have it's days.  But such are the cosmic forces of yin and yan that my victories are only part of a timeless struggle.  One which must be won at all costs. 
""",
"""						
They're Good, but Let's Not Start Any Wars Over Them   Well, in a new section of the page, I look at music and decide whether it's any good, for the benefit of you, the reader.  I will call it "My Opinion on Music".  Or "Reviews".  Yeah, that one.       Well, Franz Fedinand (or "The 'Nand" as I haven't christened them) are a Scottish indie type outfit.  That doesn't do them justice - "Indie" is used far too loosely nowadays to have any real meaning.  In this instance, let's take it to mean that they are progressive and slightly non-conformist. What's their sound like? I'll tell you.  They owe a big debt to Tom Verlaine and Television.  That kind of skewed funkiness cut through with some melodious guitar work and bass lines.  Then, in other instances, lead singer Alex Kapranos sounds like a more coquetteish Ian Curtis.  Either way, the mix spells funky and the music spells good. There's flashes of Iggy Pop's The Idiot in the density of some of the tracks, flashes of The Pixies in the pop-artful approach to lyrics.  Bizzarely, some parts of the album also recall Blondie at their Parallel-Lines zenith.  You work it out.  I can't be bothered. I've read and heard comparisons to "The 'Werk" (Kraftwerk).  This is pretty crass on the surface - there's snatches of German on some tracks, which is probably the main reason for the comparisons.  However, having said that, there is an undercurrent of a peculiarly teutonic baroque.  Difficult to pin down, but themes like darkened cinemas and dancing with men called Michael conjure a particularly Weimar atmosphere, in my mind at least. So we've established that their influences are a smorgasbord of left-field  artists.  But what is the driver that make The Nand stand out? Well there are moments of adreneline pumping brilliance.  The type that makes you want to go out and have a fight or run really fast, like all the best music does.  The opener "Jacqueline" is a multi-layered romp which displays a joy for words and sound which is refreshing.  It's slightly self-consciously skewed - it's not full-on absurdity, but has kind of taken a toffee hammer and tapped the norm hard enough to make it less normal.  Rhyming "spectacles" with "erecticles" is one such example.  The barnstorming chorus, which extols the virtues of holidaying is another.  A well rounded debut, all in all, but as a friend said to me after the Stroke's first album - "Where do they go from here?".  They might have just painted themselves into a corner by releasing something so polished so soon. Time will tell, but until that time tells, don't go assassinating any Archdukes.  
"""    ,    """						
I can't think of anything to write today, so this is going to go one of two ways.  Either I will turn this into an entertaining missive on not being able to write anything, or it will just grind to a halt, teetering precariously on the keep/delete axis.  Nearly ground to a halt after that sentence.  I suppose this hinges now on how long I have to continue for to make this a missive.  I don't know if there is a central agency which sets the length of missives, tracts and statements.  If not, there should be.  It would at least prevent confusion at times like this. 
""" , """						
I thought today about forming a Lonely Club.  Not that I'm lonely, but it seemed a compassionate thing to do.  To get lonely people together in a non-threatening atmosphere.  I could send out leaflets which say things like "Spend a lot of time on the Playstation?" or "Lonely?".  I think there would be a good response.  And then I could franchise it out, to other Lonely Co-ordinators - an entire network of Lonely Clubs could spring up, eradicating loneliness forever.  But then I thought, what if no-one turned up?  Could there be anything more tragic than someone organising a Lonely Club meeting and ending up totally alone.  That could push some Lonely Club organisers over the edge.  I suppose they could work with a friend, you know, so they didn't get Lonely. 
""" , """						
So I got my Digital Camera and I pretty much have it all figured out, I just need to know how to get pictures posted up on here now.  So off I go to explore and hopefully the next post will have a picture.  WEEEEEEEEEEEEEEEEEEEEE!!!
""" , """						
Did you ever wake up one day and everything just seemed to go totally right?    You actually want to get out of bed even though it is 4:00 a.m., your shower is awesome, your son is ready ON TIME for school, you look decent, the road to work is practically flawless and your favorite songs are all playing on the radio, your Mocaccino is Orgasmic and your Boss is in such a chirpy mood...    Well, today is that day for me and it just keeps on getting better.  My Boss told me that today was THE DAY for my bonus (Prefect timing because I have really been wanting that Digital Camera).  My cousin is in town and I rarely see her (she moved to Alberta, then Yellow Knife, now Niagara Falls) and a bunch of us are going to go for drinks tonight so its going to be picture time.  Its not sunny outside, but its warm (mostly humid but it's ok cause I left my hair curly today).  I am going to actually have time to take a full hour lunch and I will get to do so with my dad, brother and cousin.  And last but not least (or maybe Least but not last?) I am actually getting quite a bit of work done (well not right THIS second) so I wont feel guilty this weekend and think about all the things I have to do on Monday.  I dunno, maybe this is lack of sleep talking, but it really is a great day, it doesn't take much to please me huh? LOL  Ok, well back to work I go, have a good weekend.
""" , """						
I think I have had enough with men for at least the next 5 years.  Either I am super unlucky or I am a real Bitch (I am leaning more towards the earlier).  I can't seem to meet anyone half decent even if my life depended on it.  I am not talking about a serious, lets be monogamous type of relationship, I am talking about a simple friendship.  I have been talking to this guy for 4 years, Matt, aka Mr. Arkansas, we have shared every little secret (or at least I did) and every thought and fantasy and feeling and out of no where I am a bitch because I demand a little more after four fucking years.    My fuck friend on the other hand... He's in general not too bad... I just only see or hear from him when he wants some, god forbid Cindy has any needs.  There have been other guys over the last 2 years, one so called friend only called me or came by to smoke-up and watch movies when he was single, once he found himself a new fling, bye bye Cindy.  I called him on it the last time it happened, he said he would try and squeeze me in, I told him not to bother and guess what, that was the end of that.  This other guy, claimed he liked me and wanted to date me and so on, it was all BS.  I am ok with casual sex, I am only human and have needs too.  If that's all you want, just be up front about it and whatever decision I make at least it is my fault and I can't blame anyone but myself.  You would think that is pretty simple but no, not in this world.  I don't really know what I want and I am not out there trying to hook up with anyone, however I do know one thing, I want a friend (a male one, actually a woman would do just fine), I guess I am looking for a friend that I can be intimate with and also depend and trust.  I don't know if that makes sense, I'm so confused, I'm so tired of being alone.  Blah!!!
""" , """						
One of those killer days where nothing goes right for the boss, and you get blamed or the littlest thing happens and you get tons of shit.  I have a pounding head ache, I haven't had lunch or any break as a matter of fact.  All I want to do is go home, eat and take a long ass MOFO bubble bath.  Ciao!
""" , """						
Is my interest in this whole thing waning?  It wouldn't surprise me.  I had a go with a couple of these before.  They turned out rubbish. On the other hand, this is day 2.  You're still here.  So am I.  Both of I.  So this has turned out great!  Maybe a 2nd anniversary party should be arranged.  I have just the people in mind.
""" , """						
So I had a new patient yesterday, a man in his 90's, a sweet old man who is probably very lonely.  I always ask my patients how their weekend was or if anything special happened in their week and one thing led to another and I found out that his grandson never visits.  He lives in town and it is so sad that he never goes and visits.  The patient was telling me that he has never even met his great grand kids.  I just couldn't believe it.  I asked him if he had any other family in town and he mentioned a grand daughter.  When she came to pick him up, I pulled her aside and told her she needs to bring her kids to visit their grand father and great grand father.  She said that she would try and make an effort but she didn't really know what to say to her kids.  I was floored.  It's your family, you don't need to say anything special, just go and visit.  They left and I just felt so sad.  I really hope she does make an effort.  I asked my kids if they would ever not visit their grand parents and they said they would be upset if they couldn't visit them.  I hope I am raising them right, I would be so sad if they didn't visit me when I was old and couldn't do things on my own. 
""" , """						
Today is the anniversary of Elvis' death.  What do you think happened to Elvis?  Is he still alive?  I don't think he is, I mean look at all the cheeseburgers he ate... However, I am one of those people who like to believe that he is still alive, you know, just chillin, living on some remote island with Tupac and Biggy.  You know, that's really not that weird of a theory. 
""" , """						
I'm gonna go ahead and assume that a majority of the people who read this don't watch much t.v. or if you do, its most likely Discovery, History, National Geographic or some other channel that requires you to think a little bit (come on, if you watch the learning channel, you at least have to think a LITTLE).  I too, watch those channels, but every now and then, I like to shut off my brain and watch some mindless crap.  So last night, I watched one of my favorite movies (mainly cause it makes me laugh) "Sweet Home Alabama".  Love it.  If you have never seen it, shame on you!  You need to go and rent it right now, go ahead, I'll wait....   Ok, now that you have seen it, don't you just love it?  It's so cheesy and so predictable but you know what, I love those types of movies.  Another movie I love, "Two Weeks Notice", have you seen it?  Its another good one.  I have to say, Sandra Bullock and Hugh Grant make a great pair.  Oh, and lets not forget "Bridget Jones' Diary", how can you NOT love that movie?  You gotta love Bridget, she's awesome.  Hmm, what other movies do I like?  OH, "How to Lose a Guy in Ten Days", Love that one too.  Kate Hudson and Matthew McConaughey are awesome together.  Ok, I think that's enough for now, I could go on and on.  You should write to me and let me know what movies you like to watch, I am always on the look out for a good chick flick.  Ok, well, I'm out, have a great day!   """
] 

# Concatenate positive and negative examples
texts = texts_positive + texts_negative
labels = [1] * len(texts_positive) + [0] * len(texts_negative)

# Tokenize the text
tokenizer = tf.keras.preprocessing.text.Tokenizer()
tokenizer.fit_on_texts(texts)
sequences = tokenizer.texts_to_sequences(texts)

# Pad sequences to make them of equal length
max_sequence_length = max([len(seq) for seq in sequences])
padded_sequences = tf.keras.preprocessing.sequence.pad_sequences(sequences, maxlen=max_sequence_length, padding='post')

# Convert to NumPy arrays
padded_sequences = np.array(padded_sequences)
labels = np.array(labels)

# Extract additional features
phrase_patterns_features = [phrase_patterns(text) for text in texts]
punctuation_similarity_features = [punctuation_similarity(texts[i], texts[i+1]) for i in range(len(texts)-1)]
sentence_length_similarity_features = [sentence_length_similarity(texts[i], texts[i+1]) for i in range(len(texts)-1)]
pos_tag_similarity_features = [pos_tag_similarity(texts[i], texts[i+1]) for i in range(len(texts)-1)]
function_words_features = [function_words(text) for text in texts]
ngram_transition_graphs = [ngram_transition_graph(text) for text in texts]
ngram_transition_graph_similarity_features = [ngram_transition_graph_similarity(ngram_transition_graphs[i], ngram_transition_graphs[i+1]) for i in range(len(texts)-1)]
type_token_ratio_features = [type_token_ratio(text) for text in texts]
voice_detection_features = [voice_detection(text) for text in texts]

# Concatenate textual features
textual_features = np.concatenate((padded_sequences, np.array(phrase_patterns_features), np.array(punctuation_similarity_features)[:, None], np.array(sentence_length_similarity_features)[:, None], np.array(pos_tag_similarity_features)[:, None], np.array(function_words_features), np.array(ngram_transition_graph_similarity_features)[:, None], np.array(type_token_ratio_features)[:, None]), axis=1)

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(textual_features, labels, test_size=0.2, random_state=42)

# Define the model
embedding_dim = 50
vocab_size = len(tokenizer.word_index) + 1
model = Sequential([
    Concatenate(),
    Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=max_sequence_length),
    GRU(128, dropout=0.2, recurrent_dropout=0.2),
    Dense(64, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=10, batch_size=16, validation_split=0.1)

# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print("Test Accuracy:", accuracy)

# Function to predict if text belongs to the author
def predict_authorship(text):
    seq = tokenizer.texts_to_sequences([text])
    padded_seq = tf.keras.preprocessing.sequence.pad_sequences(seq, maxlen=max_sequence_length, padding='post')
    # Extract additional features
    phrase_patterns_features = phrase_patterns(text)
    punctuation_similarity_features = punctuation_similarity(text, reference_text)
    sentence_length_similarity_features = sentence_length_similarity(text, reference_text)
    pos_tag_similarity_features = pos_tag_similarity(text, reference_text)
    function_words_features = function_words(text)
    ngram_transition_graph = ngram_transition_graph(text)
    type_token_ratio_features = type_token_ratio(text)
    voice_detection_features = voice_detection(text)
    # Concatenate textual features
    textual_features = np.concatenate((padded_seq, np.array(phrase_patterns_features), np.array(punctuation_similarity_features)[:, None], np.array(sentence_length_similarity_features)[:, None], np.array(pos_tag_similarity_features)[:, None], np.array(function_words_features), np.array(ngram_transition_graph_similarity_features)[:, None], np.array(type_token_ratio_features)[:, None]), axis=1)
    probability = model.predict(textual_features)[0][0]
    return probability

# Test prediction
test_text = "A new text by Author A."
probability = predict_authorship(test_text)
print("Probability of belonging to the author:", probability)


ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (29,) + inhomogeneous part.

In [7]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, GRU, Dense, Dropout, Concatenate
from tensorflow.keras.preprocessing.sequence import pad_sequences  # Add this import
from sklearn.model_selection import train_test_split
import nltk
from nltk import bigrams
from collections import Counter
import string
from nltk.corpus import stopwords
import networkx as nx

# Define functions for additional features

def phrase_patterns(text):
    tokens = text.split()
    if len(tokens) < 2:
        return []  # Return empty list for texts with less than two words
    bigram_counts = Counter(bigrams(tokens))
    significant_collocations = [bigram for bigram, count in bigram_counts.items() if count > 1]  # Example threshold for significance
    return significant_collocations

def punctuation_similarity(text1, text2):
    punctuation_marks = set(string.punctuation)
    punctuation_count_text1 = sum(text1.count(char) for char in punctuation_marks)
    punctuation_count_text2 = sum(text2.count(char) for char in punctuation_marks)
    return min(punctuation_count_text1, punctuation_count_text2) / max(punctuation_count_text1, punctuation_count_text2)

def sentence_length_similarity(text1, text2):
    sentences_text1 = nltk.sent_tokenize(text1)
    sentences_text2 = nltk.sent_tokenize(text2)
    avg_length_text1 = sum(len(sent.split()) for sent in sentences_text1) / len(sentences_text1)
    avg_length_text2 = sum(len(sent.split()) for sent in sentences_text2) / len(sentences_text2)
    return min(avg_length_text1, avg_length_text2) / max(avg_length_text1, avg_length_text2)

def pos_tag_similarity(text1, text2):
    pos_tags_text1 = [tag for word, tag in nltk.pos_tag(nltk.word_tokenize(text1))]
    pos_tags_text2 = [tag for word, tag in nltk.pos_tag(nltk.word_tokenize(text2))]
    pos_tag_set1 = set(pos_tags_text1)
    pos_tag_set2 = set(pos_tags_text2)
    return len(pos_tag_set1.intersection(pos_tag_set2)) / len(pos_tag_set1.union(pos_tag_set2))

def function_words(text):
    stop_words = set(stopwords.words('english'))
    tokens = nltk.word_tokenize(text)
    function_words_text = [word for word in tokens if word.lower() in stop_words]
    return function_words_text

def ngram_transition_graph(text, n=2):
    tokens = nltk.word_tokenize(text)
    ngrams = list(nltk.ngrams(tokens, n))
    transition_graph = nx.DiGraph()
    transition_graph.add_nodes_from(ngrams)
    for i in range(len(ngrams) - 1):
        transition_graph.add_edge(ngrams[i], ngrams[i + 1])
    return transition_graph

def ngram_transition_graph_similarity(graph1, graph2):
    nodes_graph1 = set(graph1.nodes)
    nodes_graph2 = set(graph2.nodes)
    intersection = nodes_graph1.intersection(nodes_graph2)
    union = nodes_graph1.union(nodes_graph2)
    return len(intersection) / len(union)

def type_token_ratio(text):
    tokens = nltk.word_tokenize(text)
    unique_tokens = set(tokens)
    return len(unique_tokens) / len(tokens)

def voice_detection(sentence):
    # Example implementation using simple keyword matching
    if 'is' in sentence.split() or 'are' in sentence.split():
        return 'passive'
    else:
        return 'active'

# Your original code starts here

# Sample data (replace with your dataset)
texts_positive = ["""The 4 tile mural I worked a week on went into the kiln,along with everything else, and thankfully everything was ok except the mural.  The underglaze was too thick, and the glaze was too thick (I decided to float glaze the tile for fear of smearing all the black.)  The glaze actually picked up the black, moved it over, and then fired in a big blob in several spots.  Live and learn.  I just don't know if I have it in me to make another one.   I'll have to dig deep for this one.  I guess I should be thinking of the kiln as half full instead of half empty!""",
"""I'm so tired today because I was up all night worrying about the kiln firing.  It smelled something fierce, and I was worried we were all going to die of carbon monoxide poisoning in our sleep.  Plus, I kept hearing banging, which I hope wasn't anything exploding in the kiln, but I haven't found out yet because it's still 600 degrees the next day.""",
"""I happily called the lady about the tile mural that was just set to see how great it looked.  To my suprise, she was very shocked at how warped the tiles look when set.  I was so upset I had nightmares all night and obsessed about it all day. Luckily, she called me back the next day to apologize because she was in a bad mood.  P.s.  Their check was returned the next day.""",
"""Today I must get 100 bisque white tiles today.  My supplier of 7 years has my order of 4 weeks ago delayed in Mexico in customs.  Note to self:  Remember Murphy.  Never assume anything.""",
"""MUST PAY SALES TAXES TODAY OR ELSE!  Something about a deadline is a sure cure to get one off one's butt.  Yesterday placed a free ad in the local paper. Cost for free, and for free stuff-This should be VERY interesting to see the response. This week I should test some cone 5 glazes just for the heck of it.""",
"""fired the last mural.  now I'm depressed.  it's kind of like planning a wedding all year and then the day after you have nothing to do.  Not that I have nothing to do, just no one telling me what I have to do. (work orders) I could have been better prepared with a summer camp to fall into; I'm certainly getting phone calls.  But I was too sick to plan a couple of months ago."If you fail to plan, you plan to fail!"  On another note, I am regretting not buying the small test kiln.  Twice , and now three times I have lost a customer or an opportunity because I didn't have a smaller kiln to do test tiles in or cone 10, or whatever.  I will be getting one soon.""",
"""make hump molds make slab shapes:  babies, women,large star windchimes, small stars make multi-level vase  garden tiles or initial tiles peacock tray baby stuff for daniel-frame w letters, ornament, train plaque,send tiles for footprints(also to Liz) mix colored dipping glazes in quart containers from Smart and Final marketing tools needed: scout flyers party flyers new maps or general flyers summer camp schedule flyers baby footprint postcards fix website babyfootprint gift certificates""",
"""well, the Robinson mural worked out.  4 tiles cracked or broken, all re-painted.  Hope they like them.  The Cordillera mural is getting bigger everyday, now 75 more tiles than anticipated.  They look beautiful going into the kiln.  Double stilting them for less warping.  Had a chance to teach a self-portrait class for children;one of my favorites.  This week had my second student for handmade tiles who wants to go into business as such.  After a little trepidation, I gave into the fact that I am a teacher, and so I teach.  She however wants to learn clay crafting, versus painting, so this is fun.  Got to break open my plaster, and discovered I have a love-hate relationship with plaster carving. Thought it might be easier if I colored the plaster in three after mixing the batch and pour it in layers, so you could see what you are doing. I do love pouring molds.""",
"""I have a school painting on Monday.  For some reason I have a total mental block about the glazes.  Couldn't get the bottles I want,don't want to use the old ones.  Debating which glazes to use, the yucky cheap ones, or the nice expensive ones.  Usually, I have no choice.  I just don't want to spend any more money on half-used glazes. I have a hundred different colors, but not enough of any one to fill 12 bottles.""",
"""Well, I got a call from the mural organizer who asked if the tiles would be ready to view tomorrow.  Of course, I hadn't even re-fired the tiles yet, or done the two "dog paw" accents, or the tile that no one wanted to paint, or the stupid 4 tile center.  So I had to fess up, and beg for more time.  Of course, now that my rear is on the line, I managed to become amazingly inspired and finish the two accent tiles while my kids got ready for school.  I cancelled a doctor's appointment, and painted the stupid center.  Not perfect and beautiful like the last one, but it's there at least.  Now the dilemma of how lazy do I want to be?  Should I leave it black and white, or color it in, and if I color it in, how much color should I bother with?  Or is it passable as it is?  Mostly, I just fear total failure like the last time.""",
"""The great news is the mural I started working on last year, I was asking $6/tile. At that rate I wasn't going to get anywhere, so I asked for $8/tile. I've waited a year, and the lady felt so bad, she said they would give me $10/tile..Yippee!""",
"""Today I need to start mixing glazes for the last tile painting for the school year. The question is can I get 200 of the new nozzle bottles I tried out this weekend by Friday shipped and filled? Or should I go with what I have , which now seems like crap compared. They of course will never know the difference. Re-painting 2 tiles that broke, refiring two broken tiles, and refiring 6" tiles that the glaze didn't flatten out all the way. Must be done asap because they are setting this weekend, and has already been delayed once. Guess I better start re-painting those stupid 4 part mural tiles. Ugh!""",
"""Today I had a glass artist over for a firing.  It was a good excuse to do some research on fused glass.  My past attempt at painted,fused, and slumped glass turned out so-so.  I have some material already, so it would be nice to learn how to use it properly.  She is an older woman, and I had a nice time talking about glass with her; how she sells her stuff (in Venezuela), and just about life as an artist.  I custom programmed my kiln (she usually does it manually).  It was interesting to know that you can open a red hot flaming kiln with glass inside with no breakage.  I can't wait to see what is inside.  Learned about cutting glass, slumping in bisque, applying enamels to gum arabic through a sifter, using elmers glue to stick shards of glass together, using a metallic sharpie to write with on glass, and firing inclusions and dichroic glass.  Like to test frit on clay and glass.  baking soda makes bubbles between glass (use sparingly).Use of fiber paper vs. kiln wash.  slump at a higher temperature and fire paint at a lower temp to keep intensity of color.Use ceramic frames for drop molds (dishes)  Cracked bisque works fine as a glass saggar! """,
"""Attending NCECA in San Diego in 2003 was a turning point for me in many ways. Little did I know when several people asked if I was going, that it was more than I could have imagined. I went reluctantly, tired from work, but curious. When I got there I was lost, and wandered aimlessly, not knowing what I had walked into. I paid my $65, and set off to see what it was that everyone thought was so great. I wandered in and out of lectures and demonstrations. I was most interested in the business lectures, only really wanting to find a way to make a living doing what I love. I wandered through exhibits, spying the mug sale, the cone box contest, and the k-12 children's entries. Was my stuff up to par with the "real" teachers who had a degree? I vowed I would enter next year, just to be competitive.(I didn't , but that's another story). I was really excited to go to the basement area where everyone was selling everything. Tools I didn't know existed, schools beckoning (asking myself, how would my life had been different if I had majored in ceramics, and not married and had children), companies throwing samples my way by the caseloads. Paper, paper, and more paper. The next day was better, knowing that I was there to learn as much as possible in a short time period. I sat through lectures and demos. I absorbed conversations and watched people look and watch. When I got back home to my studio, I wasn't the same. When I left, I was a housewife that had more than a passing interest in a hobby. I was an entrepeneur, trying to find the holy grail that would catapault me from sometimes breaking even to supporting myself. When I came back, I felt like an artist. I realized I knew much more than I thought. I realized that the real world experience I had jumped into blindly had given me more opportunity than most people get in a lifetime of study. I saw my life 20 years from now, and 40 years from now, planning what I would like to do when the kids are grown and this season of my life had passed. I saw myself, 70 years old, touching the clay and asking the questions...... First , when I got back to work, I was engulfed by production and exploring new avenues of business. I taught with a new confidence, that yes, I knew what I was doing with what I did, and everything else would come later. I experimented more, and slowly the studio became a studio, not a storefront. I had an apprentice, and a muse. I would spend hours with the music on, in the silent of my space, pondering the next projects, or working with ferocity. I realized the sacrifices I had made as an artist, in my ventures as a businesswoman. I had no extra time or energy to "create" for the sake of creating, going into the unknown with no "agenda". I did not know what that felt like. I closed the studio. It felt like death. Where was my purpose without a store to support? I hated being just a mother. I almost couldn't do it, and didn't have to. I had renegotiated my lease for pennies. But I knew I had to cut off my arm for another one to grow literally. I moved the studio to my home, like a lot of potters do. I am lucky that I have patient people who live with me that accept the studio taking over the whole of the house. The driveway, the garage, the courtyard, the livingroom, the office, even in the bedroom. They know my sanity lies in it.""",
"""ceramic doorhangers with addons fused in themes: horse,flower ect-blank for dry erase ceramic lightswitch faceplates with addons fused in themes also pour lightswich plates then handbuild over them and around them gifts to do:scriffito doorhangers for stefani,emily,and natalie daniel and also ceramic babybottle bank for daniel and babyblocks frame family tree large tile with handbuilt additions and a "wall" or fence around it GO GET 200 BOTTLES FROM C +C WHEREHOUSE AND PICKUP AND RANDIS""",
"""recently tried a new dipping clear that unfortunately was discontinued due to lead leeching. Won't use it on dinnerware, but, oh my god, it is beautiful. Good thing I didn't return it to the factory like they wanted. Wonder if they'll still sell it with a different label warning. They should! I will write them because they took an uneccessary beating because of the mistake."""]  # Provide a list of texts from author A
texts_negative = ["""						
As promised, here's the next instalment of bus mongs.  I bet you've been looking forward to this, haven't you...   2. Bus Monitors  Now, in every walk of life, in every profession, in every place where humans exist there are heirachies.  I accept these heirachies with varying degrees of grace.  But, if there is one thing that makes me want to stick two fingers up to "The Man" and form a rock n' roll band, it's people who assume importance and status without any requirement for them to exist.  I have to be careful here to convey exactly what I mean.  I want you to understand.  Two elderly women on my bus service have elected themselves bus monitors.  As far as I know, there was never any formal nomination.  Let's be clear; these people have assumed the position of bus lords.  This basically involves:  a)  Sitting right behind the driver and shouting conversations at him in a "spirit of the blitz" style dialect.   Eg: "Ooh 'ello Frank, I 'ope you'll be putting yer foot down today, my Bert's expecting his dinner!".   Essentially, mindless, insiduous prattle.  The volume at which these conversations take place cow everyone around them into aural submission.  No-one can read, listening to music is impossible, and quiet chats with friends are verboten.  Essentially, this is an exercise in illustrating that they are friends with the driver, and so assume some of the importance they crave by association.  They rarely look around or even notice other bus people, the bus people they nominally claim to represent.  b) Getting on the bus first.  This is truly the raison d'etre of the bus monitor.  They force themselves, elbows and handbags flailing, onto the buses first for three reasons.  Firstly, this (again) gives them the air of importance and status that they crave.  Secondly, getting on the bus first gives them first choice of seats - they can then position themselves in prime bus real estate for loud driver conversations.  Thirdly, this allows them to have protracted chats with the driver, and fumble for their tickets whilst a large queue stretches back outside getting drenched in the rain.   c)  On the rare occasions where a new driver has been in place (I always feel great sympathy for these hapless footsoldiers, thrust naively onto the battlefield), bus monitors enter a state of heightened awareness.  Not content with shouting often unnecessary directions into the side of the driver's head, they will also offer information on who normally gets on at those stops, whether to wait for them if they aren't there and other classified, bus-monitor-priveleged information.  MI5 themselves would have dossiers less detailed on members of the Taliban.  d) On the rarer still occasions where the bus makes a wrong turning, the bus monitors become a flurry of activity.  "Wrong way!" they shout, whilst looking around incredulously at fellow passengers, as if the driver had defaced a war memorial.  e) Bus monitors are the guardians of bus protocol.  Although they can blatantly disregard other passengers, any kind of ignorance on the part of other passengers is met with disapproving looks.  Any breach of accepted protocol, whether or not you have ever been in this country before, been on a bus before, have the use of your arms and legs etc is met with their clear disgust.    Wedged into their seats with their old-woman paraphanelia, these are actually quite sad individuals.  I can only imagine the voids in their lives must have become slightly less yawning when they found solace in bossing people about on buses.  In two years of bus usage, I have yet to see them justify their self-appointed positions, and on top of it all, they clearly enjoy this.  They act like they are doing me a favour.  If getting on my nerves and stinking of Parma Violets is somehow helping me, I can only marvel at what my shortcomings must have been to start with.  Perhaps I was too relaxed and the bus didn't smell of Parma Violets enough.  We can but wonder.  This is just a small sample of the irritations that these people cause, and for once, I am not just saying that because I can't think of anything else.  It really is just a small sample.""",
"""						
In case any of you people care, I am one of the hardy souls of this world who commute to work.  Yes, I get the bus.  And I like it.  In fact, a 30-45 minute journey in the morning is an unbelievably relaxing way to get to work.  In a carefully temperature controlled cocoon, you can pop a bit of music on and watch the scenery, leaving all the actual "doing" to someone else.  Namely the driver.  It's difficult to convey the benefits of merely sitting, doing nothing, on an adequately comfortable seat, and not having to worry about anything for half an hour.   This is, in theory, superb.  However, my idyll in this metal tube with wheels is frequently tested by putrid invaders.  Invaders of the worst kind.  Space invaders, if you like.  I have attempted to categorise them in a new series, starting below.  All users of public transport will identify them.  And though they have many names, their presence is unmistakable.    1.  The feckless youngster.  Yesterday a regular user of my bus service, a feckless young girl, brought into sharp focus why I hate other bus people so much.  Perhaps I should avoid the term "bus people", as this either suggests a gypsy-like existence in an abandoned bus, or people who actually resemble buses.  Either way, it's not what I am trying to say.  Basically, I shall now define "bus people" as people who get the bus, in order to avoid confusion.  Anyway, her crimes against me are myriad and serious.  In an international court of bus law (ICBL) she would probably be tried and sentenced to death.   We have a distinct history.  It all started when, about 18 months ago, this individual started to wait at my stop.  Looking little different from the usual slack-jawed windowlickers of my home town, I paid little heed, instead assuming my favourite bus-waiting position of roughly perpendicular to the shelter in order to look up the hill, legs heroically akimbo like the Collossus of Rhodes.  I was somewhat surprised when she boarded the private vehicle which takes me to work.  Perhaps I had misjudged her, despite her appearance and demeanour.   A few weeks passed with respectful silence between us.  All was well, and I felt we had formed an invisible bond of ignoring eachother.  But then, a terrible thing happened. One day, she approached the bus stop, and I was unfortunate enough to momentarily lock eyes with her.  This, as most people would doubtless know, is a pre-cursor to some kind of conversation.  To my alarm, I had discovered that my mouth was open as well.  Snapping it shut, I did my best to rescue the situation.  I noticed that something was different about her... something was amiss.  My mind raced to pin it down.  Of course! Her hair.  She had dyed her hair.    "I like your hair" I said, before the full disastrous impact of what I had done hit me.   I had sparked up a conversation with a bus person!  No more louche days reading in the window seat, listening to the latest grooves.  No more beautiful days watching the speeding countryside.  I would be sucked in, engulfed in this desperate harlot's whirlygig of hair chat.  Maybe the whole situation would escalate to shopping, or worse, work.  Oh cruelest of all fates!!! Why?  Why did my tongue forsake me, when I most needed it to stop it's diabolical dance!  "Oh, thanks, I only di...."  By this time I had run onto the bus.  I couldn't risk more contact or possible friendship with this woman.  She would doubtless destroy what little peace I could wrestle from my day.  More would come of this, I was sure, and indeed it did.  An insidious campaign of irritation followed.  Once, the bus arrived ridiculously early, and we both missed it.  An uncomfortably long period of waiting ensued, before it was clear that no bus would be coming.  I was forced by the situation to offer a non-commital "I think we've missed it".  She rudely turned her back and stormed off, frantically jabbing at her mobile phone.  As we were both bound for the same destination, and we had both missed the same bus, a nice gesture would have been to offer a place in the lift she was undoubtedly arranging (although I would have turned her down on principle).  Instead she glared at me as if I had somehow Karmically arranged the absence of the bus in order to ruin her day.    This week alone, of the 5 days which are busable, she has neglected to have a ticket on 3 days.  This is not only gyppo behaviour, but is also an embarrassing social situation, which I seek to avoid at all times.  All 3 times, she has been "let off" the fare, which has only exponentially increased my contempt for her.  Then there's the running.  I get on the bus first, due to clever kerbside positioning.  She gets on immediately afterwards, and I swear she runs directly behind me, hurrying me along.  I feel obliged to hurl everything into the seat and dive out of her way.  Why she feels the need to hurtle up the bus is a mystery to all except me.  To me, it is but more evidence of her idiocy.    It's clear she thinks she is the J-Lo of the bus community.  Well she got her commupance today alright.  As the bus drew near, some schoolchildren passed us.  Their cries of "She's got a £2 handbag!" were delight to my ears as they systematically humiliated my self-important co-busee, who dresses like someone doing an impression of a character from Sex in the City down on their luck.  Other times the bus has pulled away, as she frantically runs behind it, and I have merely sat, smiling smugly.  Oh, good will have it's days.  But such are the cosmic forces of yin and yan that my victories are only part of a timeless struggle.  One which must be won at all costs. 
""",
"""						
They're Good, but Let's Not Start Any Wars Over Them   Well, in a new section of the page, I look at music and decide whether it's any good, for the benefit of you, the reader.  I will call it "My Opinion on Music".  Or "Reviews".  Yeah, that one.       Well, Franz Fedinand (or "The 'Nand" as I haven't christened them) are a Scottish indie type outfit.  That doesn't do them justice - "Indie" is used far too loosely nowadays to have any real meaning.  In this instance, let's take it to mean that they are progressive and slightly non-conformist. What's their sound like? I'll tell you.  They owe a big debt to Tom Verlaine and Television.  That kind of skewed funkiness cut through with some melodious guitar work and bass lines.  Then, in other instances, lead singer Alex Kapranos sounds like a more coquetteish Ian Curtis.  Either way, the mix spells funky and the music spells good. There's flashes of Iggy Pop's The Idiot in the density of some of the tracks, flashes of The Pixies in the pop-artful approach to lyrics.  Bizzarely, some parts of the album also recall Blondie at their Parallel-Lines zenith.  You work it out.  I can't be bothered. I've read and heard comparisons to "The 'Werk" (Kraftwerk).  This is pretty crass on the surface - there's snatches of German on some tracks, which is probably the main reason for the comparisons.  However, having said that, there is an undercurrent of a peculiarly teutonic baroque.  Difficult to pin down, but themes like darkened cinemas and dancing with men called Michael conjure a particularly Weimar atmosphere, in my mind at least. So we've established that their influences are a smorgasbord of left-field  artists.  But what is the driver that make The Nand stand out? Well there are moments of adreneline pumping brilliance.  The type that makes you want to go out and have a fight or run really fast, like all the best music does.  The opener "Jacqueline" is a multi-layered romp which displays a joy for words and sound which is refreshing.  It's slightly self-consciously skewed - it's not full-on absurdity, but has kind of taken a toffee hammer and tapped the norm hard enough to make it less normal.  Rhyming "spectacles" with "erecticles" is one such example.  The barnstorming chorus, which extols the virtues of holidaying is another.  A well rounded debut, all in all, but as a friend said to me after the Stroke's first album - "Where do they go from here?".  They might have just painted themselves into a corner by releasing something so polished so soon. Time will tell, but until that time tells, don't go assassinating any Archdukes.  
"""    ,    """						
I can't think of anything to write today, so this is going to go one of two ways.  Either I will turn this into an entertaining missive on not being able to write anything, or it will just grind to a halt, teetering precariously on the keep/delete axis.  Nearly ground to a halt after that sentence.  I suppose this hinges now on how long I have to continue for to make this a missive.  I don't know if there is a central agency which sets the length of missives, tracts and statements.  If not, there should be.  It would at least prevent confusion at times like this. 
""" , """						
I thought today about forming a Lonely Club.  Not that I'm lonely, but it seemed a compassionate thing to do.  To get lonely people together in a non-threatening atmosphere.  I could send out leaflets which say things like "Spend a lot of time on the Playstation?" or "Lonely?".  I think there would be a good response.  And then I could franchise it out, to other Lonely Co-ordinators - an entire network of Lonely Clubs could spring up, eradicating loneliness forever.  But then I thought, what if no-one turned up?  Could there be anything more tragic than someone organising a Lonely Club meeting and ending up totally alone.  That could push some Lonely Club organisers over the edge.  I suppose they could work with a friend, you know, so they didn't get Lonely. 
""" , """						
So I got my Digital Camera and I pretty much have it all figured out, I just need to know how to get pictures posted up on here now.  So off I go to explore and hopefully the next post will have a picture.  WEEEEEEEEEEEEEEEEEEEEE!!!
""" , """						
Did you ever wake up one day and everything just seemed to go totally right?    You actually want to get out of bed even though it is 4:00 a.m., your shower is awesome, your son is ready ON TIME for school, you look decent, the road to work is practically flawless and your favorite songs are all playing on the radio, your Mocaccino is Orgasmic and your Boss is in such a chirpy mood...    Well, today is that day for me and it just keeps on getting better.  My Boss told me that today was THE DAY for my bonus (Prefect timing because I have really been wanting that Digital Camera).  My cousin is in town and I rarely see her (she moved to Alberta, then Yellow Knife, now Niagara Falls) and a bunch of us are going to go for drinks tonight so its going to be picture time.  Its not sunny outside, but its warm (mostly humid but it's ok cause I left my hair curly today).  I am going to actually have time to take a full hour lunch and I will get to do so with my dad, brother and cousin.  And last but not least (or maybe Least but not last?) I am actually getting quite a bit of work done (well not right THIS second) so I wont feel guilty this weekend and think about all the things I have to do on Monday.  I dunno, maybe this is lack of sleep talking, but it really is a great day, it doesn't take much to please me huh? LOL  Ok, well back to work I go, have a good weekend.
""" , """						
I think I have had enough with men for at least the next 5 years.  Either I am super unlucky or I am a real Bitch (I am leaning more towards the earlier).  I can't seem to meet anyone half decent even if my life depended on it.  I am not talking about a serious, lets be monogamous type of relationship, I am talking about a simple friendship.  I have been talking to this guy for 4 years, Matt, aka Mr. Arkansas, we have shared every little secret (or at least I did) and every thought and fantasy and feeling and out of no where I am a bitch because I demand a little more after four fucking years.    My fuck friend on the other hand... He's in general not too bad... I just only see or hear from him when he wants some, god forbid Cindy has any needs.  There have been other guys over the last 2 years, one so called friend only called me or came by to smoke-up and watch movies when he was single, once he found himself a new fling, bye bye Cindy.  I called him on it the last time it happened, he said he would try and squeeze me in, I told him not to bother and guess what, that was the end of that.  This other guy, claimed he liked me and wanted to date me and so on, it was all BS.  I am ok with casual sex, I am only human and have needs too.  If that's all you want, just be up front about it and whatever decision I make at least it is my fault and I can't blame anyone but myself.  You would think that is pretty simple but no, not in this world.  I don't really know what I want and I am not out there trying to hook up with anyone, however I do know one thing, I want a friend (a male one, actually a woman would do just fine), I guess I am looking for a friend that I can be intimate with and also depend and trust.  I don't know if that makes sense, I'm so confused, I'm so tired of being alone.  Blah!!!
""" , """						
One of those killer days where nothing goes right for the boss, and you get blamed or the littlest thing happens and you get tons of shit.  I have a pounding head ache, I haven't had lunch or any break as a matter of fact.  All I want to do is go home, eat and take a long ass MOFO bubble bath.  Ciao!
""" , """						
Is my interest in this whole thing waning?  It wouldn't surprise me.  I had a go with a couple of these before.  They turned out rubbish. On the other hand, this is day 2.  You're still here.  So am I.  Both of I.  So this has turned out great!  Maybe a 2nd anniversary party should be arranged.  I have just the people in mind.
""" , """						
So I had a new patient yesterday, a man in his 90's, a sweet old man who is probably very lonely.  I always ask my patients how their weekend was or if anything special happened in their week and one thing led to another and I found out that his grandson never visits.  He lives in town and it is so sad that he never goes and visits.  The patient was telling me that he has never even met his great grand kids.  I just couldn't believe it.  I asked him if he had any other family in town and he mentioned a grand daughter.  When she came to pick him up, I pulled her aside and told her she needs to bring her kids to visit their grand father and great grand father.  She said that she would try and make an effort but she didn't really know what to say to her kids.  I was floored.  It's your family, you don't need to say anything special, just go and visit.  They left and I just felt so sad.  I really hope she does make an effort.  I asked my kids if they would ever not visit their grand parents and they said they would be upset if they couldn't visit them.  I hope I am raising them right, I would be so sad if they didn't visit me when I was old and couldn't do things on my own. 
""" , """						
Today is the anniversary of Elvis' death.  What do you think happened to Elvis?  Is he still alive?  I don't think he is, I mean look at all the cheeseburgers he ate... However, I am one of those people who like to believe that he is still alive, you know, just chillin, living on some remote island with Tupac and Biggy.  You know, that's really not that weird of a theory. 
""" , """						
I'm gonna go ahead and assume that a majority of the people who read this don't watch much t.v. or if you do, its most likely Discovery, History, National Geographic or some other channel that requires you to think a little bit (come on, if you watch the learning channel, you at least have to think a LITTLE).  I too, watch those channels, but every now and then, I like to shut off my brain and watch some mindless crap.  So last night, I watched one of my favorite movies (mainly cause it makes me laugh) "Sweet Home Alabama".  Love it.  If you have never seen it, shame on you!  You need to go and rent it right now, go ahead, I'll wait....   Ok, now that you have seen it, don't you just love it?  It's so cheesy and so predictable but you know what, I love those types of movies.  Another movie I love, "Two Weeks Notice", have you seen it?  Its another good one.  I have to say, Sandra Bullock and Hugh Grant make a great pair.  Oh, and lets not forget "Bridget Jones' Diary", how can you NOT love that movie?  You gotta love Bridget, she's awesome.  Hmm, what other movies do I like?  OH, "How to Lose a Guy in Ten Days", Love that one too.  Kate Hudson and Matthew McConaughey are awesome together.  Ok, I think that's enough for now, I could go on and on.  You should write to me and let me know what movies you like to watch, I am always on the look out for a good chick flick.  Ok, well, I'm out, have a great day!   """
] 


# Concatenate positive and negative examples
texts = texts_positive + texts_negative
labels = [1] * len(texts_positive) + [0] * len(texts_negative)

# Tokenize the text
tokenizer = tf.keras.preprocessing.text.Tokenizer()
tokenizer.fit_on_texts(texts)
sequences = tokenizer.texts_to_sequences(texts)

# Pad sequences to make them of equal length
max_sequence_length = max([len(seq) for seq in sequences])
padded_sequences = pad_sequences(sequences, maxlen=max_sequence_length, padding='post')  # Padding sequences using pad_sequences

# Convert to NumPy arrays
padded_sequences = np.array(padded_sequences)
labels = np.array(labels)

# Convert function words to numerical indices
word_to_index = {}  # Create a dictionary to map words to indices
index = 1
for words_list in function_words_features:
    for word in words_list:
        if word not in word_to_index:
            word_to_index[word] = index
            index += 1

# Remove non-numeric elements from function_words_array
function_words_array_numeric = [[word for word in words if word.isdigit()] for words in function_words_array]

# Pad each list of numeric function words to the maximum length
padded_function_words_features = [words + ['0'] * (max_function_words_length - len(words)) for words in function_words_array_numeric]

# Convert padded function words features into a 2D NumPy array
function_words_array_padded = np.array(padded_function_words_features)

# Pad sequences to ensure consistent length
function_words_array_padded = pad_sequences(function_words_array_padded, maxlen=max_function_words_length, padding='post')

# Convert each list of function words to numerical indices
numerical_function_words = [[word_to_index[word] for word in words_list] for words_list in function_words_features]

# Pad the sequences to the maximum length
padded_function_words = pad_sequences(numerical_function_words, maxlen=max_function_words_length, padding='post')

# Convert to numpy array
function_words_array = np.array(padded_function_words)


# Extract additional features
phrase_patterns_features = [phrase_patterns(text) for text in texts]
punctuation_similarity_features = [punctuation_similarity(texts[i], texts[i+1]) for i in range(len(texts)-1)]
sentence_length_similarity_features = [sentence_length_similarity(texts[i], texts[i+1]) for i in range(len(texts)-1)]
pos_tag_similarity_features = [pos_tag_similarity(texts[i], texts[i+1]) for i in range(len(texts)-1)]
function_words_features = [function_words(text) for text in texts]
ngram_transition_graphs = [ngram_transition_graph(text) for text in texts]
ngram_transition_graph_similarity_features = [ngram_transition_graph_similarity(ngram_transition_graphs[i], ngram_transition_graphs[i+1]) for i in range(len(texts)-1)]
type_token_ratio_features = [type_token_ratio(text) for text in texts]
voice_detection_features = [voice_detection(text) for text in texts]

# Create a vocabulary of all unique bigrams
vocab = set()
for bigrams_list in phrase_patterns_features:
    for bigram in bigrams_list:
        vocab.add(bigram)

# Assign a unique index to each bigram
bigram_to_index = {bigram: i + 1 for i, bigram in enumerate(vocab)}

# Replace each bigram in phrase_patterns_features with its corresponding index
indexed_phrase_patterns_features = [[bigram_to_index[bigram] for bigram in bigrams_list] for bigrams_list in phrase_patterns_features]

# Compute the maximum length of the indexed phrase patterns
max_phrase_patterns_length = max(len(seq) for seq in indexed_phrase_patterns_features)

# Pad the sequences
padded_phrase_patterns_features = pad_sequences(indexed_phrase_patterns_features, maxlen=max_phrase_patterns_length, padding='post')

# Find the maximum length of function words
max_function_words_length = max(len(words) for words in function_words_features)

# Pad each list of function words to the maximum length
padded_function_words_features = [words + [''] * (max_function_words_length - len(words)) for words in function_words_features]

# Convert padded function words features into a 2D NumPy array
function_words_array = np.array(padded_function_words_features)

# Calculate max_features_length
max_features_length = max(len(padded_sequences), len(padded_phrase_patterns_features), len(function_words_array), len(type_token_ratio_features))

# Ensure all features have consistent shapes
max_features_length = max(len(padded_sequences), len(padded_phrase_patterns_features), len(function_words_array), len(type_token_ratio_features))

# Truncate or pad the other features to match max_features_length
punctuation_similarity_features = punctuation_similarity_features[:max_features_length]
sentence_length_similarity_features = sentence_length_similarity_features[:max_features_length]
pos_tag_similarity_features = pos_tag_similarity_features[:max_features_length]
ngram_transition_graph_similarity_features = ngram_transition_graph_similarity_features[:max_features_length]
type_token_ratio_features = type_token_ratio_features[:max_features_length]
function_words_array = pad_sequences(function_words_array, maxlen=max_function_words_length, padding='post')[:max_features_length]

# Convert all features to numpy arrays
padded_sequences = np.array(padded_sequences)
padded_phrase_patterns_features = np.array(padded_phrase_patterns_features)
punctuation_similarity_features = np.array(punctuation_similarity_features)
sentence_length_similarity_features = np.array(sentence_length_similarity_features)
pos_tag_similarity_features = np.array(pos_tag_similarity_features)
function_words_array = np.array(function_words_array)
ngram_transition_graph_similarity_features = np.array(ngram_transition_graph_similarity_features)
type_token_ratio_features = np.array(type_token_ratio_features)

# Concatenate textual features
textual_features = np.concatenate((padded_sequences, 
                                   padded_phrase_patterns_features, 
                                   punctuation_similarity_features, 
                                   sentence_length_similarity_features, 
                                   pos_tag_similarity_features, 
                                   function_words_array, 
                                   ngram_transition_graph_similarity_features, 
                                   type_token_ratio_features), 
                                  axis=1)


# Similar transformations for other features if needed
# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(textual_features, labels, test_size=0.2, random_state=42)

# Define the model
embedding_dim = 50
vocab_size = len(tokenizer.word_index) + 1
model = Sequential([
    Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=max_sequence_length),
    GRU(128, dropout=0.2, recurrent_dropout=0.2),
    Dense(64, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=10, batch_size=16, validation_split=0.1)

# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print("Test Accuracy:", accuracy)

# Function to predict if text belongs to the author
def predict_authorship(text):
    seq = tokenizer.texts_to_sequences([text])
    padded_seq = tf.keras.preprocessing.sequence.pad_sequences(seq, maxlen=max_sequence_length, padding='post')
    # Extract additional features
    phrase_patterns_features = phrase_patterns(text)
    punctuation_similarity_features = punctuation_similarity(text, reference_text)
    sentence_length_similarity_features = sentence_length_similarity(text, reference_text)
    pos_tag_similarity_features = pos_tag_similarity(text, reference_text)
    function_words_features = function_words(text)
    ngram_transition_graph = ngram_transition_graph(text)
    type_token_ratio_features = type_token_ratio(text)
    voice_detection_features = voice_detection(text)
    # Convert features to compatible shapes
    padded_phrase_patterns_features = pad_sequences([phrase_patterns_features], maxlen=max_phrase_patterns_length, padding='post')
    # Similar transformations for other features if needed
    # Concatenate textual features
    textual_features = np.concatenate((padded_seq, padded_phrase_patterns_features, np.array(punctuation_similarity_features)[:, None], np.array(sentence_length_similarity_features)[:, None], np.array(pos_tag_similarity_features)[:, None], np.array(function_words_features), np.array(ngram_transition_graph_similarity_features)[:, None], np.array(type_token_ratio_features)[:, None]), axis=1)
    probability = model.predict(textual_features)[0][0]
    return probability

# Test prediction
test_text = "A new text by Author A."
probability = predict_authorship(test_text)
print("Probability of belonging to the author:", probability)


ValueError: invalid literal for int() with base 10: 'The'

In [10]:
import string
import nltk
import numpy as np
import networkx as nx
from collections import Counter
from nltk import bigrams
from nltk.corpus import stopwords
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, GRU, Dense, Dropout
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Define functions for additional features

def phrase_patterns(text):
    tokens = text.split()
    if len(tokens) < 2:
        return []  # Return empty list for texts with less than two words
    bigram_counts = Counter(bigrams(tokens))
    significant_collocations = [bigram for bigram, count in bigram_counts.items() if count > 1]  # Example threshold for significance
    return significant_collocations

def punctuation_similarity(text1, text2):
    punctuation_marks = set(string.punctuation)
    punctuation_count_text1 = sum(text1.count(char) for char in punctuation_marks)
    punctuation_count_text2 = sum(text2.count(char) for char in punctuation_marks)
    return min(punctuation_count_text1, punctuation_count_text2) / max(punctuation_count_text1, punctuation_count_text2)

def sentence_length_similarity(text1, text2):
    sentences_text1 = nltk.sent_tokenize(text1)
    sentences_text2 = nltk.sent_tokenize(text2)
    avg_length_text1 = sum(len(sent.split()) for sent in sentences_text1) / len(sentences_text1)
    avg_length_text2 = sum(len(sent.split()) for sent in sentences_text2) / len(sentences_text2)
    return min(avg_length_text1, avg_length_text2) / max(avg_length_text1, avg_length_text2)

def pos_tag_similarity(text1, text2):
    pos_tags_text1 = [tag for word, tag in nltk.pos_tag(nltk.word_tokenize(text1))]
    pos_tags_text2 = [tag for word, tag in nltk.pos_tag(nltk.word_tokenize(text2))]
    pos_tag_set1 = set(pos_tags_text1)
    pos_tag_set2 = set(pos_tags_text2)
    return len(pos_tag_set1.intersection(pos_tag_set2)) / len(pos_tag_set1.union(pos_tag_set2))

def function_words(text):
    stop_words = set(stopwords.words('english'))
    tokens = nltk.word_tokenize(text)
    function_words_text = [word for word in tokens if word.lower() in stop_words]
    return function_words_text

def ngram_transition_graph(text, n=2):
    tokens = nltk.word_tokenize(text)
    ngrams = list(nltk.ngrams(tokens, n))
    transition_graph = nx.DiGraph()
    transition_graph.add_nodes_from(ngrams)
    for i in range(len(ngrams) - 1):
        transition_graph.add_edge(ngrams[i], ngrams[i + 1])
    return transition_graph

def ngram_transition_graph_similarity(graph1, graph2):
    nodes_graph1 = set(graph1.nodes)
    nodes_graph2 = set(graph2.nodes)
    intersection = nodes_graph1.intersection(nodes_graph2)
    union = nodes_graph1.union(nodes_graph2)
    return len(intersection) / len(union)

def type_token_ratio(text):
    tokens = nltk.word_tokenize(text)
    unique_tokens = set(tokens)
    return len(unique_tokens) / len(tokens)

def voice_detection(sentence):
    # Example implementation using simple keyword matching
    if 'is' in sentence.split() or 'are' in sentence.split():
        return 'passive'
    else:
        return 'active'

# Concatenate positive and negative examples
texts = texts_positive + texts_negative
labels = [1] * len(texts_positive) + [0] * len(texts_negative)

# Tokenize the text
tokenizer = tf.keras.preprocessing.text.Tokenizer()
tokenizer.fit_on_texts(texts)
sequences = tokenizer.texts_to_sequences(texts)

# Pad sequences to make them of equal length
max_sequence_length = max([len(seq) for seq in sequences])
padded_sequences = pad_sequences(sequences, maxlen=max_sequence_length, padding='post')  # Padding sequences using pad_sequences

# Convert to NumPy arrays
padded_sequences = np.array(padded_sequences)
labels = np.array(labels)

# Convert function words to numerical indices
word_to_index = {}  # Create a dictionary to map words to indices
index = 1
for words_list in function_words_features:
    for word in words_list:
        if word not in word_to_index:
            word_to_index[word] = index
            index += 1

# Remove non-numeric elements from function_words_array
function_words_array_numeric = [[word for word in words if word.isdigit()] for words in function_words_array]

# Pad each list of numeric function words to the maximum length
padded_function_words_features = [words + ['0'] * (max_function_words_length - len(words)) for words in function_words_array_numeric]

# Convert padded function words features into a 2D NumPy array
function_words_array_padded = np.array(padded_function_words_features)

# Pad sequences to ensure consistent length
function_words_array_padded = pad_sequences(function_words_array_padded, maxlen=max_function_words_length, padding='post')

# Truncate or pad the other features to match max_features_length
function_words_array_padded = function_words_array_padded[:max_features_length]

# Convert each list of function words to numerical indices
numerical_function_words = [[word_to_index[word] for word in words_list] for words_list in function_words_features]

# Pad the sequences to the maximum length
padded_function_words = pad_sequences(numerical_function_words, maxlen=max_function_words_length, padding='post')

# Convert to numpy array
function_words_array = np.array(padded_function_words)


# Extract additional features
phrase_patterns_features = [phrase_patterns(text) for text in texts]
punctuation_similarity_features = [punctuation_similarity(texts[i], texts[i+1]) for i in range(len(texts)-1)]
sentence_length_similarity_features = [sentence_length_similarity(texts[i], texts[i+1]) for i in range(len(texts)-1)]
pos_tag_similarity_features = [pos_tag_similarity(texts[i], texts[i+1]) for i in range(len(texts)-1)]
function_words_features = [function_words(text) for text in texts]
ngram_transition_graphs = [ngram_transition_graph(text) for text in texts]
ngram_transition_graph_similarity_features = [ngram_transition_graph_similarity(ngram_transition_graphs[i], ngram_transition_graphs[i+1]) for i in range(len(texts)-1)]
type_token_ratio_features = [type_token_ratio(text) for text in texts]
voice_detection_features = [voice_detection(text) for text in texts]

# Create a vocabulary of all unique bigrams
vocab = set()
for bigrams_list in phrase_patterns_features:
    for bigram in bigrams_list:
        vocab.add(bigram)

# Assign a unique index to each bigram
bigram_to_index = {bigram: i + 1 for i, bigram in enumerate(vocab)}

# Replace each bigram in phrase_patterns_features with its corresponding index
indexed_phrase_patterns_features = [[bigram_to_index[bigram] for bigram in bigrams_list] for bigrams_list in phrase_patterns_features]

# Compute the maximum length of the indexed phrase patterns
max_phrase_patterns_length = max(len(seq) for seq in indexed_phrase_patterns_features)

# Pad the sequences
padded_phrase_patterns_features = pad_sequences(indexed_phrase_patterns_features, maxlen=max_phrase_patterns_length, padding='post')

# Find the maximum length of function words
max_function_words_length = max(len(words) for words in function_words_features)

# Pad each list of function words to the maximum length
padded_function_words_features = [words + [''] * (max_function_words_length - len(words)) for words in function_words_features]

# Convert padded function words features into a 2D NumPy array
function_words_array = np.array(padded_function_words_features)

# Calculate max_features_length
max_features_length = max(len(padded_sequences), len(padded_phrase_patterns_features), len(function_words_array), len(type_token_ratio_features))

# Ensure all features have consistent shapes
max_features_length = max(len(padded_sequences), len(padded_phrase_patterns_features), len(function_words_array), len(type_token_ratio_features))

# Truncate or pad the other features to match max_features_length
punctuation_similarity_features = punctuation_similarity_features[:max_features_length]
sentence_length_similarity_features = sentence_length_similarity_features[:max_features_length]
pos_tag_similarity_features = pos_tag_similarity_features[:max_features_length]
ngram_transition_graph_similarity_features = ngram_transition_graph_similarity_features[:max_features_length]
type_token_ratio_features = type_token_ratio_features[:max_features_length]
function_words_array = pad_sequences(function_words_array, maxlen=max_function_words_length, padding='post')[:max_features_length]

# Convert all features to numpy arrays
padded_sequences = np.array(padded_sequences)
padded_phrase_patterns_features = np.array(padded_phrase_patterns_features)
punctuation_similarity_features = np.array(punctuation_similarity_features)
sentence_length_similarity_features = np.array(sentence_length_similarity_features)
pos_tag_similarity_features = np.array(pos_tag_similarity_features)
function_words_array = np.array(function_words_array)
ngram_transition_graph_similarity_features = np.array(ngram_transition_graph_similarity_features)
type_token_ratio_features = np.array(type_token_ratio_features)

# Concatenate textual features
textual_features = np.concatenate((padded_sequences, 
                                   padded_phrase_patterns_features, 
                                   punctuation_similarity_features, 
                                   sentence_length_similarity_features, 
                                   pos_tag_similarity_features, 
                                   function_words_array, 
                                   ngram_transition_graph_similarity_features, 
                                   type_token_ratio_features), 
                                  axis=1)


# Similar transformations for other features if needed
# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(textual_features, labels, test_size=0.2, random_state=42)

# Define the model
embedding_dim = 50
vocab_size = len(tokenizer.word_index) + 1
model = Sequential([
    Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=max_sequence_length),
    GRU(128, dropout=0.2, recurrent_dropout=0.2),
    Dense(64, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=10, batch_size=16, validation_split=0.1)

# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print("Test Accuracy:", accuracy)

# Function to predict if text belongs to the author
def predict_authorship(text):
    seq = tokenizer.texts_to_sequences([text])
    padded_seq = tf.keras.preprocessing.sequence.pad_sequences(seq, maxlen=max_sequence_length, padding='post')
    # Extract additional features
    phrase_patterns_features = phrase_patterns(text)
    punctuation_similarity_features = punctuation_similarity(text, reference_text)
    sentence_length_similarity_features = sentence_length_similarity(text, reference_text)
    pos_tag_similarity_features = pos_tag_similarity(text, reference_text)
    function_words_features = function_words(text)
    ngram_transition_graph = ngram_transition_graph(text)
    type_token_ratio_features = type_token_ratio(text)
    voice_detection_features = voice_detection(text)
    # Convert features to compatible shapes
    padded_phrase_patterns_features = pad_sequences([phrase_patterns_features], maxlen=max_phrase_patterns_length, padding='post')
    # Similar transformations for other features if needed
    # Concatenate textual features
    textual_features = np.concatenate((padded_seq, padded_phrase_patterns_features, np.array(punctuation_similarity_features)[:, None], np.array(sentence_length_similarity_features)[:, None], np.array(pos_tag_similarity_features)[:, None], np.array(function_words_features), np.array(ngram_transition_graph_similarity_features)[:, None], np.array(type_token_ratio_features)[:, None]), axis=1)
    probability = model.predict(textual_features)[0][0]
    return probability

# Test prediction
test_text = "A new text by Author A."
probability = predict_authorship(test_text)
print("Probability of belonging to the author:", probability)


ValueError: invalid literal for int() with base 10: 'The'

In [11]:
import string
import nltk
import numpy as np
import networkx as nx
from collections import Counter
from nltk import bigrams
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, GRU, Dense, Dropout
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Define functions for additional features

def phrase_patterns(text):
    tokens = text.split()
    if len(tokens) < 2:
        return []  # Return empty list for texts with less than two words
    bigram_counts = Counter(bigrams(tokens))
    significant_collocations = [bigram for bigram, count in bigram_counts.items() if count > 1]  # Example threshold for significance
    return significant_collocations

def punctuation_similarity(text1, text2):
    punctuation_marks = set(string.punctuation)
    punctuation_count_text1 = sum(text1.count(char) for char in punctuation_marks)
    punctuation_count_text2 = sum(text2.count(char) for char in punctuation_marks)
    return min(punctuation_count_text1, punctuation_count_text2) / max(punctuation_count_text1, punctuation_count_text2)

def sentence_length_similarity(text1, text2):
    sentences_text1 = nltk.sent_tokenize(text1)
    sentences_text2 = nltk.sent_tokenize(text2)
    avg_length_text1 = sum(len(sent.split()) for sent in sentences_text1) / len(sentences_text1)
    avg_length_text2 = sum(len(sent.split()) for sent in sentences_text2) / len(sentences_text2)
    return min(avg_length_text1, avg_length_text2) / max(avg_length_text1, avg_length_text2)

def pos_tag_similarity(text1, text2):
    pos_tags_text1 = [tag for word, tag in nltk.pos_tag(nltk.word_tokenize(text1))]
    pos_tags_text2 = [tag for word, tag in nltk.pos_tag(nltk.word_tokenize(text2))]
    pos_tag_set1 = set(pos_tags_text1)
    pos_tag_set2 = set(pos_tags_text2)
    return len(pos_tag_set1.intersection(pos_tag_set2)) / len(pos_tag_set1.union(pos_tag_set2))

def ngram_transition_graph(text, n=2):
    tokens = nltk.word_tokenize(text)
    ngrams = list(nltk.ngrams(tokens, n))
    transition_graph = nx.DiGraph()
    transition_graph.add_nodes_from(ngrams)
    for i in range(len(ngrams) - 1):
        transition_graph.add_edge(ngrams[i], ngrams[i + 1])
    return transition_graph

def ngram_transition_graph_similarity(graph1, graph2):
    nodes_graph1 = set(graph1.nodes)
    nodes_graph2 = set(graph2.nodes)
    intersection = nodes_graph1.intersection(nodes_graph2)
    union = nodes_graph1.union(nodes_graph2)
    return len(intersection) / len(union)

def type_token_ratio(text):
    tokens = nltk.word_tokenize(text)
    unique_tokens = set(tokens)
    return len(unique_tokens) / len(tokens)

def voice_detection(sentence):
    # Example implementation using simple keyword matching
    if 'is' in sentence.split() or 'are' in sentence.split():
        return 'passive'
    else:
        return 'active'

# Concatenate positive and negative examples
texts = texts_positive + texts_negative
labels = [1] * len(texts_positive) + [0] * len(texts_negative)

# Tokenize the text
tokenizer = tf.keras.preprocessing.text.Tokenizer()
tokenizer.fit_on_texts(texts)
sequences = tokenizer.texts_to_sequences(texts)

# Pad sequences to make them of equal length
max_sequence_length = max([len(seq) for seq in sequences])
padded_sequences = pad_sequences(sequences, maxlen=max_sequence_length, padding='post')  # Padding sequences using pad_sequences

# Convert to NumPy arrays
padded_sequences = np.array(padded_sequences)
labels = np.array(labels)

# Extract additional features
phrase_patterns_features = [phrase_patterns(text) for text in texts]
punctuation_similarity_features = [punctuation_similarity(texts[i], texts[i+1]) for i in range(len(texts)-1)]
sentence_length_similarity_features = [sentence_length_similarity(texts[i], texts[i+1]) for i in range(len(texts)-1)]
pos_tag_similarity_features = [pos_tag_similarity(texts[i], texts[i+1]) for i in range(len(texts)-1)]
ngram_transition_graphs = [ngram_transition_graph(text) for text in texts]
ngram_transition_graph_similarity_features = [ngram_transition_graph_similarity(ngram_transition_graphs[i], ngram_transition_graphs[i+1]) for i in range(len(texts)-1)]
type_token_ratio_features = [type_token_ratio(text) for text in texts]
voice_detection_features = [voice_detection(text) for text in texts]

# Create a vocabulary of all unique bigrams
vocab = set()
for bigrams_list in phrase_patterns_features:
    for bigram in bigrams_list:
        vocab.add(bigram)

# Assign a unique index to each bigram
bigram_to_index = {bigram: i + 1 for i, bigram in enumerate(vocab)}

# Replace each bigram in phrase_patterns_features with its corresponding index
indexed_phrase_patterns_features = [[bigram_to_index[bigram] for bigram in bigrams_list] for bigrams_list in phrase_patterns_features]

# Compute the maximum length of the indexed phrase patterns
max_phrase_patterns_length = max(len(seq) for seq in indexed_phrase_patterns_features)

# Pad the sequences
padded_phrase_patterns_features = pad_sequences(indexed_phrase_patterns_features, maxlen=max_phrase_patterns_length, padding='post')

# Calculate max_features_length
max_features_length = max(len(padded_sequences), len(padded_phrase_patterns_features), len(type_token_ratio_features))

# Ensure all features have consistent shapes
max_features_length = max(len(padded_sequences), len(padded_phrase_patterns_features), len(type_token_ratio_features))

# Truncate or pad the other features to match max_features_length
punctuation_similarity_features = punctuation_similarity_features[:max_features_length]
sentence_length_similarity_features = sentence_length_similarity_features[:max_features_length]
pos_tag_similarity_features = pos_tag_similarity_features[:max_features_length]
ngram_transition_graph_similarity_features = ngram_transition_graph_similarity_features[:max_features_length]
type_token_ratio_features = type_token_ratio_features[:max_features_length]

# Convert all features to numpy arrays
padded_sequences = np.array(padded_sequences)
padded_phrase_patterns_features = np.array(padded_phrase_patterns_features)
punctuation_similarity_features = np.array(punctuation_similarity_features)
sentence_length_similarity_features = np.array(sentence_length_similarity_features)
pos_tag_similarity_features = np.array(pos_tag_similarity_features)
ngram_transition_graph_similarity_features = np.array(ngram_transition_graph_similarity_features)
type_token_ratio_features = np.array(type_token_ratio_features)

# Concatenate textual features
textual_features = np.concatenate((padded_sequences, 
                                   padded_phrase_patterns_features, 
                                   punctuation_similarity_features, 
                                   sentence_length_similarity_features, 
                                   pos_tag_similarity_features, 
                                   ngram_transition_graph_similarity_features, 
                                   type_token_ratio_features), 
                                  axis=1)

# Similar transformations for other features if needed
# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(textual_features, labels, test_size=0.2, random_state=42)

# Define the model
embedding_dim = 50
vocab_size = len(tokenizer.word_index) + 1
model = Sequential([
    Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=max_sequence_length),
    GRU(128, dropout=0.2, recurrent_dropout=0.2),
    Dense(64, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=10, batch_size=16, validation_split=0.1)

# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print("Test Accuracy:", accuracy)

# Function to predict if text belongs to the author
def predict_authorship(text):
    seq = tokenizer.texts_to_sequences([text])
    padded_seq = tf.keras.preprocessing.sequence.pad_sequences(seq, maxlen=max_sequence_length, padding='post')
    # Extract additional features
    phrase_patterns_features = phrase_patterns(text)
    punctuation_similarity_features = punctuation_similarity(text, reference_text)
    sentence_length_similarity_features = sentence_length_similarity(text, reference_text)
    pos_tag_similarity_features = pos_tag_similarity(text, reference_text)
    ngram_transition_graph = ngram_transition_graph(text)
    type_token_ratio_features = type_token_ratio(text)
    # Convert features to compatible shapes
    padded_phrase_patterns_features = pad_sequences([phrase_patterns_features], maxlen=max_phrase_patterns_length, padding='post')
    # Similar transformations for other features if needed
    # Concatenate textual features
    textual_features = np.concatenate((padded_seq, padded_phrase_patterns_features, np.array(punctuation_similarity_features)[:, None], np.array(sentence_length_similarity_features)[:, None], np.array(pos_tag_similarity_features)[:, None], np.array(ngram_transition_graph_similarity_features)[:, None], np.array(type_token_ratio_features)[:, None]), axis=1)
    probability = model.predict(textual_features)[0][0]
    return probability

# Test prediction
test_text = "A new text by Author A."
probability = predict_authorship(test_text)
print("Probability of belonging to the author:", probability)


ValueError: all the input arrays must have same number of dimensions, but the array at index 0 has 2 dimension(s) and the array at index 2 has 1 dimension(s)

In [12]:
# Find the maximum length of function words
max_function_words_length = max(len(words) for words in function_words_features)

# Pad each list of function words to the maximum length
padded_function_words_features = [words + [''] * (max_function_words_length - len(words)) for words in function_words_features]

# Convert padded function words features into a 2D NumPy array
function_words_array = np.array(padded_function_words_features)

print("Shape of padded_sequences:", padded_sequences.shape)
print("Shape of padded_phrase_patterns_features:", padded_phrase_patterns_features.shape)
print("Shape of punctuation_similarity_features:", np.array(punctuation_similarity_features)[:, None].shape)
print("Shape of sentence_length_similarity_features:", np.array(sentence_length_similarity_features)[:, None].shape)
print("Shape of pos_tag_similarity_features:", np.array(pos_tag_similarity_features)[:, None].shape)
print("Shape of function_words_array:", function_words_array.shape)
print("Shape of ngram_transition_graph_similarity_features:", np.array(ngram_transition_graph_similarity_features)[:, None].shape)
print("Shape of type_token_ratio_features:", np.array(type_token_ratio_features)[:, None].shape)


Shape of padded_sequences: (29, 1013)
Shape of padded_phrase_patterns_features: (29, 44)
Shape of punctuation_similarity_features: (28, 1)
Shape of sentence_length_similarity_features: (28, 1)
Shape of pos_tag_similarity_features: (28, 1)
Shape of function_words_array: (29, 507)
Shape of ngram_transition_graph_similarity_features: (28, 1)
Shape of type_token_ratio_features: (29, 1)


In [10]:
import nltk
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\pc\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping corpora\stopwords.zip.


True

In [2]:
!pip install keras



In [3]:
!pip install tensorflow

