In [1]:
from datasets import load_dataset
from datasets import Dataset

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Download datasets
ds = load_dataset('json', data_files='coqar-questions.jsonl', split='train')

Found cached dataset json (/home/codespace/.cache/huggingface/datasets/json/default-f47651a98f70b344/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96)


In [3]:
ds

Dataset({
    features: ['id', 'source', 'instruction', 'output'],
    num_rows: 2901
})

In [4]:
# Import tokenizer
from transformers import AutoTokenizer

# model: h2oai/h2ogpt-4096-llama2-13b
tokenizer = AutoTokenizer.from_pretrained('h2oai/h2ogpt-4096-llama2-13b')

In [5]:
# Tokenize the instruction and output of each row in dataset
# to get the length of each row
def tokenize(row):
    print(row)
    # add new column to dataset for lengths
    row['instruction_length'] = len(tokenizer(row['instruction'])['input_ids'])
    row['output_length'] = len(tokenizer(row['output'])['input_ids'])
    return row

ds = ds.map(tokenize)
ds

Map:   0%|          | 0/2901 [00:00<?, ? examples/s]

Map:   1%|          | 16/2901 [00:00<00:41, 68.90 examples/s]

{'id': '3zotghdk5ibi9cex97fepx7jetpso7', 'source': 'wikipedia', 'instruction': 'The Vatican Apostolic Library (), more commonly called the Vatican Library or simply the Vat, is the library of the Holy See, located in Vatican City. Formally established in 1475, although it is much older, it is one of the oldest libraries in the world and contains one of the most significant collections of historical texts. It has 75,000 codices from throughout history, as well as 1.1 million printed books, which include some 8,500 incunabula. \n\nThe Vatican Library is a research library for history, law, philosophy, science and theology. The Vatican Library is open to anyone who can document their qualifications and research needs. Photocopies for private study of pages from books published between 1801 and 1990 can be requested in person or by mail. \n\nIn March 2014, the Vatican Library began an initial four-year project of digitising its collection of manuscripts, to be made available online. \n\nTh

Map:   1%|          | 34/2901 [00:00<00:31, 91.81 examples/s]

{'id': '3wj1oxy92agboo5nlq4r7bndcb68a1', 'source': 'mctest', 'instruction': "Laura and Graham were having a party for their good friend Judy. Judy had graduated high school and they wanted to show her how proud they were of her, and Judy would be moving far away at the end of the year. Judy was going to college to become a doctor. She thought about becoming a lawyer or an engineer. She even thought about being a scientist. Judy would be bringing her friend Mike. There wouldn't be many people at the party, since this was a celebration with close friends. Laura set out drinks and snacks for Judy and the other guests. The snacks she set out were salty pretzels. QUESTIONS:\n", 'output': "Q1:\n1. Who was hosting the party?\n2. Who threw the party?\n3. Which two people were throwing a party?\nQ2:\n1. Who was Laura and Graham's party for?\n2. Who did Laura and Graham throw the party for?\n3. For whom did Laura and Graham host a party?\nQ3:\n1. What was Judy's party for?\n2. What was the occas

Map:   2%|▏         | 71/2901 [00:00<00:20, 139.10 examples/s]

{'id': '37qw5d2zrgmfokrh2qqisbhjznms83', 'source': 'gutenberg', 'instruction': 'CHAPTER IV. \n\nSignor Andrea D\'Arbino, searching vainly through the various rooms in the palace for Count Fabio d\'Ascoli, and trying as a last resource, the corridor leading to the ballroom and grand staircase, discovered his friend lying on the floor in a swoon, without any living creature near him. Determining to avoid alarming the guests, if possible, D\'Arbino first sought help in the antechamber. He found there the marquis\'s valet, assisting the Cavaliere Finello (who was just taking his departure) to put on his cloak. \n\nWhile Finello and his friend carried Fabio to an open window in the antechamber, the valet procured some iced water. This simple remedy, and the change of atmosphere, proved enough to restore the fainting man to his senses, but hardly--as it seemed to his friends--to his former self. They noticed a change to blankness and stillness in his face, and when he spoke, an indescribable

Map:   5%|▍         | 131/2901 [00:00<00:12, 222.09 examples/s]

{'id': '3a1pq49wvhh8nbtgsb549nn9b0kh1v', 'source': 'mctest', 'instruction': "This summer Frank and his friends went to Boy Scout camp for two weeks. At camp they had lots of fun activities like swimming, wood carving, and telling ghost stories. At camp there weren't any restaurants or grocery stores, so for food they would have to make their own meals. One thing they could do is make peanut butter sandwiches, which every scout was given at the start of camp. But pretty soon everyone was sick of peanut butter sandwiches and wanted to eat something else instead. Some boys went out into the forest and picked out berries and roots that were safe to eat. Some boys even took out the boat and went fishing. They came back with a big fish that they cleaned and cooked themselves. \n\nAt first, Frank's mom was very worried about letting Frank go to camp. She was worried that he could get lost in the woods and be eaten by a bear. She was worried that he might get into a fight with the other boys. 

Map:   7%|▋         | 203/2901 [00:01<00:09, 292.62 examples/s]

{'id': '3mh9dq757wcawcp3atx6zpg57qhgut', 'source': 'cnn', 'instruction': 'Louisville, Kentucky (CNN) -- I\'ll Have Another cut loose on the home stretch to run down Bodemeister and earn the first Kentucky Derby wins for his rider and trainer Saturday. \n\nI\'ll Have Another, with a finish of 2:01:83, earned nearly $1.5 million of the $2.2 million purse. \n\nThat\'s quite a payoff for a horse that was purchased last year for the modest sum of $35,000. \n\nJockey Mario Gutierrez, making his Derby debut, called I\'ll Have Another a steady competitor. \n\n"They didn\'t believe (I\'ll Have Another) could have made it this far," Gutierrez said. "But even if they wanted me to pick (any horse in the field), I would have stayed with him." \n\nThe winner had 15-1 odds; Bodemeister was at 4-1, according to the Derby website. Dullahan, with 12-1 odds, also made a late run and finished third. \n\nI\'ll Have Another defeated Bodemeister by more than one length at the 1Â¼-mile classic, attended by a 

Map:   9%|▉         | 268/2901 [00:01<00:08, 299.11 examples/s]

{'id': '33nf62tlxj26kiasole7qfznxsajkf', 'source': 'gutenberg', 'instruction': 'Chapter 12: In Mocenigo\'s Power. \n\nIt was fully an hour before Polani was recalled to the council chamber. He saw at once, by the flushed and angry faces of some of the council, that the debate had been a hot one. At this he was not surprised, for he knew that the friends and connections of Ruggiero Mocenigo would vehemently oppose the suggestion he had made. \n\nThe doge announced the decision. \n\n"The council thank you for your suggestion, Signor Polani, and have resolved, by a majority, to confer upon Messer Francisco Hammond the high honour of placing his name upon the list of the citizens of Venice, without requiring from him the oaths of allegiance to the state. As such an honour has never before been conferred, save upon personages of the highest rank, it will be a proof of the gratitude which Venice feels towards one who has done her such distinguished service. The decree to that effect will be 

Map:  12%|█▏        | 343/2901 [00:01<00:08, 300.70 examples/s]

{'id': '3os46crslfz8cypx36ypjk5zsytv6w', 'source': 'gutenberg', 'instruction': 'CHAPTER XXIV. JULIUS TAKES A HAND \n\nIN his suite at Claridge\'s, Kramenin reclined on a couch and dictated to his secretary in sibilant Russian. \n\nPresently the telephone at the secretary\'s elbow purred, and he took up the receiver, spoke for a minute or two, then turned to his employer. \n\n"Some one below is asking for you." \n\n"Who is it?" \n\n"He gives the name of Mr. Julius P. Hersheimmer." \n\n"Hersheimmer," repeated Kramenin thoughtfully. "I have heard that name before." \n\n"His father was one of the steel kings of America," explained the secretary, whose business it was to know everything. "This young man must be a millionaire several times over." \n\nThe other\'s eyes narrowed appreciatively. \n\n"You had better go down and see him, Ivan. Find out what he wants." \n\nThe secretary obeyed, closing the door noiselessly behind him. In a few minutes he returned. \n\n"He declines to state his bus

Map:  14%|█▍        | 413/2901 [00:01<00:07, 321.34 examples/s]

{'id': '34qn5it0tzrfnb75to7yi5b03gq08g', 'source': 'race', 'instruction': 'Jack is an 11-year-old boy. One day he was playing with a ball. The ball went into the street, and Jack ran for the ball. A car hit him. Jack\'s parents took him to the hospital. The doctors told them, "Jack\'s head is hurt. Maybe he will wake up very soon. Maybe he will never wake up." Every day Jack\'s parents went to see him and talked to him. But Jack never talked to them. He just slept. One day Jack\'s father said, "Wake up, Jack. Let\'s go home and play with Cody." Cody is Jack\'s dog. When Jack\'s father said "Cody", Jack moved his arm. Then Jack\'s parents had an idea. They told the nurse, "We want to bring Jack\'s dog to the hospital. Is it OK?" "A dog in the hospital?" the nurse said. "That\'s very unusual. But. yes, it\'s OK." The next day, Jack\'s parents brought Cody to the hospital. When they put the dog on Jack\'s bed, Jack opened his eyes. Jack\'s parents brought Cody to the hospital every day. C

Map:  16%|█▌        | 462/2901 [00:01<00:07, 321.70 examples/s]

{'id': '3u84xhcdicdb6vqtlfud7syhjas4z2', 'source': 'mctest', 'instruction': "There once was a lion who did not roar, but instead he said meow. The lion was sad, because he could not roar like his other lion friends. The lion went to talk to his family. He first went to talk to his brother, but his brother was not home. Then he went to talk to his dad, but his dad was not home either. Luckily, the lion's sister was home. He asked his sister why he thought he could not roar. His sister said they need to go talk to their friend the squirrel. The squirrel lived in a tree with a nice door mat outside. The squirrel said to the lion if he wanted to start to roar instead of meow, then he need to run faster than the other lion's. So the next day, the lion played a game, in which he ran faster than all the other lions. Now, the lion roars and doesn't meow. QUESTIONS:\n", 'output': "Q1:\n1. What noise do lions make?\n2. How do lions talk?\n3. What noise do lions use to communicate?\nQ2:\n1. What 

Map:  18%|█▊        | 536/2901 [00:02<00:07, 307.74 examples/s]

{'id': '3wetl7aqwt8shln0edie8jzg5gg53e', 'source': 'gutenberg', 'instruction': 'CHAPTER XLII. \n\nGEORGE II. A.D. 1725--1760. \n\nThe reign of George II. was a very warlike one. Indeed he was the last king of England who ever was personally in a battle; and, curiously enough, this battle--that of Fontenoy--was the last that a king of France also was present in. It was, however, not a very interesting battle; and it was not clear who really won it, nor are wars of this time very easy to understand. \n\nThe battle of Fontenoy was fought in the course of a great war to decide who would be emperor of Germany, in which France and England took different sides; and this made Charles Edward Stuart, the eldest son of James, think it was a good moment for trying once again to get back the crown of his forefathers. He was a fine-looking young man, with winning manners, and a great deal more spirit than his father: and when he landed in Scotland with a very few followers, one Highland gentleman af

Map:  21%|██        | 616/2901 [00:02<00:06, 352.17 examples/s]

{'id': '34x6j5flptysvl8n1qy4m1bwx3djqt', 'source': 'race', 'instruction': '"How did Norman know, Sister Emma?" \n\n"He overheard you and Aryan arguing. I suspect that he purposely overheard on you. Norman knew or concluded what profession Aryan practiced. He might well have followed Aryan on his explorations. Whether he did or not is beside the point. When Aryan came back yesterday afternoon, Norman certainly decided that he had made some find, for Aryan told Norman that he would be leaving for the capital to meet the detective the next morning. He probably followed Aryan to your room and overheard what passed between you. \n\n"Since you could not act against the law of man and God, he would serve a natural justice in his own way. He took the jar of poison hemlock from the chemistry shop and when Aryan asked for a drink, he supplied it. Norman did not know the precise quantity needed and so Aryan did not suffer the full effects until after the bell called the community into the dining 

Map:  24%|██▍       | 702/2901 [00:02<00:05, 388.44 examples/s]

{'id': '3azhrg4cu4ktme1zh7c2ro3po9430s', 'source': 'wikipedia', 'instruction': 'Morocco, officially known as the Kingdom of Morocco (, lit. "The Western Kingdom"; ), is a sovereign country located in the Maghreb region of North Africa. Geographically, Morocco is characterised by a rugged mountainous interior, large tracts of desert and a lengthy coastline along the Atlantic Ocean and Mediterranean Sea. \n\nMorocco has a population of over 33.8\xa0million and an area of . Its capital is Rabat, and the largest city is Casablanca. Other major cities include Marrakesh, Tangier, Salé, Fes, and Meknes. A historically prominent regional power, Morocco has a history of independence not shared by its neighbours. Since the foundation of the first Moroccan state by Idris I in 789, the country has been ruled by a series of independent dynasties, reaching its zenith under the Almoravid and Almohad dynasty, spanning parts of Iberia and Northwestern Africa. Marinid and Saadi dynasties continued the s

Map:  27%|██▋       | 787/2901 [00:02<00:05, 401.13 examples/s]

{'id': '3o7l7bfshep737ycahi4gj7i0diei1', 'source': 'race', 'instruction': 'Jack was a pumpkin farmer. He lived in a big house on the edge of a town and grew the largest Pumpkin farm around the town. Jack\'s pumpkins were famous , for they always had the perfect shape and never rotted ( ) until well after Halloween . Every year around Halloween , people came from all over the town to buy his pumpkins to make their jack-o-lanterns ( ) One year, Jack was thinking about his pumpkin harvest ( ) of the year when his good friend Pete came to visit him. I have some bad news , Jack ," Pete said. "What is it ? " Jack asked . a little worried. The town decided not to have Halloween this year! They said there was simply no reason for it and they didn\'t have time to celebrate it because many other holidays were coming up soon . Peter told him. He knew this was very bad news for jack. Jack became very sad and lowered ( ) his head . "What can I do now , Peter?" I have all these beautiful pumpkins an

Map:  30%|███       | 878/2901 [00:02<00:04, 423.78 examples/s]

{'id': '3eqhhy4hqsstbxzo9spyrdop9ka5g9', 'source': 'wikipedia', 'instruction': 'The President of the Russian Federation () is the elected head of state, Supreme Commander-in-Chief, and holder of the highest office in the Russian Federation. The current President of Russia is Vladimir Putin. \n\nIn 1991, the office was briefly known as the President of the Russian Soviet Federative Socialist Republic () until 25 December 1991. According to the 1978 Russian Constitution, the President of Russia was head of the executive branch and headed the Council of Ministers of Russia. According to the current 1993 Constitution of Russia, the President of Russia is not a part of the Government of Russia, which exercises executive power. \n\nIn all cases where the President of the Russian Federation is unable to fulfill his duties, they shall be temporarily delegated to the Prime Minister, who becomes Acting President of Russia. The Chairman of the Federation Council is the third important position af

Map:  32%|███▏      | 942/2901 [00:03<00:04, 420.50 examples/s]

{'id': '3cn4lgxd5xob15goptsutlpfekqy4g', 'source': 'wikipedia', 'instruction': 'A local area network (LAN) is a computer network that interconnects computers within a limited area such as a residence, school, laboratory, university campus or office building. By contrast, a wide area network (WAN) not only covers a larger geographic distance, but also generally involves leased telecommunication circuits or Internet links. An even greater contrast is the Internet, which is a system of globally connected business and personal computers. \n\nEthernet and Wi-Fi are the two most common technologies in use for local area networks. Historical technologies include ARCNET, Token ring, and AppleTalk. \n\nThe increasing demand and use of computers in universities and research labs in the late 1960s generated the need to provide high-speed interconnections between computer systems. A 1970 report from the Lawrence Radiation Laboratory detailing the growth of their "Octopus" network gave a good indic

Map:  34%|███▍      | 987/2901 [00:03<00:04, 423.84 examples/s]

{'id': '33sa9f9trxup42ihzymjhagqgyjeww', 'source': 'cnn', 'instruction': "(CNN)John Isner could not keep the United States in the Davis Cup on Sunday, so it was likely small consolation that his incredible tennis milestone remained unbeaten. \n\nWhile the American lost to Andy Murray in Scotland, putting Great Britain into the quarterfinals of the prestigious teams event, halfway across the world an absorbing battle was playing out between two of South America's most bitter rivals. \n\nIn the end it didn't come close to matching Isner's 11-hour marathon against Nicolas Mahut at Wimbledon in 2010, but the fourth rubber in the clash between Argentina and Brazil made its own piece of history. \n\nFor six hour and 43 minutes, Leonardo Mayer and Joao Souza contested the second-longest singles match in tennis history -- and third longest of any format after a seven-hour doubles clash between Switzerland and the Czech Republic in 2013. \n\nMayer eventually triumphed, after his 11th match poin

Map:  36%|███▌      | 1040/2901 [00:03<00:07, 255.10 examples/s]

{'id': '3ywrv122cszv3xjlrvli7cz7kce8uq', 'source': 'race', 'instruction': "Every time Lionel Messi breaks a record, it seems appropriate to compare him to the legends that came before him. \n\nIn 2012, he rewrote soccer history on multiple occasions, and his latest record came when he scored his 86th goal of the calendar year, breaking Gerd Muller's previous mark, before wrapping up the year with 91 goals. \n\nAll statistics indicate that Messi is currently the best player in the world, and that he will go down in history as the top footballer of his generation. But when he is judged against all-time greats like Pele and Diego Maradona, he still has work to do. \n\nWe don't know whether the Barcelona striker will have another year in which he records more than 90 goals, but he must still sustain a similar level of production for the next several seasons. \n\nMessi has already taken part in three of Barcelona's victorious Champions League campaigns, and he played an important role in tw

Map:  39%|███▉      | 1127/2901 [00:03<00:05, 314.63 examples/s]

{'id': '3vsolarpkb9bi8pch3vvkz4irc4398', 'source': 'gutenberg', 'instruction': "CHAPTER I. \n\nSIR LIONEL GOES TO HIS WOOING. \n\nYes, they were off. All the joys of that honeymoon shall be left to the imagination of the reader. Their first conversation, as it took place in the carriage which bore them from Mr. Bertram's door, has been given. Those which followed were probably more or less of the same nature. Sir Henry, no doubt, did strive to give some touch of romance to the occasion; but in no such attempt would his wife assist him. To every material proposition that he made, she gave a ready assent; in everything she acceded to his views; she would dine at two, or at eight, as he pleased; she was ready to stay two weeks, or only two days in Paris, as best suited him; she would adapt herself to pictures, or to architecture, or to theatres, or to society, or to going on and seeing nothing, exactly as he adapted himself. She never frowned, or looked black, or had headaches, or couldn'

Map:  42%|████▏     | 1223/2901 [00:04<00:04, 378.58 examples/s]

{'id': '3i0btbyzaxlu2hyn6s5shiz2x870yz', 'source': 'wikipedia', 'instruction': "The House of Bourbon is a European royal house of French origin, a branch of the Capetian dynasty (). Bourbonic kings first ruled France and Navarre in the 16th century. By the 18th century, members of the Spanish Bourbon dynasty held thrones in Spain, Naples, Sicily, and Parma. Spain and Luxembourg currently have monarchs of the House of Bourbon. \n\nThe royal Bourbons originated in 1272 when the heiress of the lordship of Bourbon married the youngest son of King Louis IX. The house continued for three centuries as a cadet branch, while more senior Capetians ruled France, until Henry IV became the first Bourbon king of France in 1589. Bourbon monarchs then united to France the small kingdom of Navarre, which Henry's father had acquired by marriage in 1555, ruling both until the 1792 overthrow of the monarchy during the French Revolution. Restored briefly in 1814 and definitively in 1815 after the fall of t

Map:  45%|████▌     | 1317/2901 [00:04<00:03, 419.45 examples/s]

{'id': '304sm51wa34yqipo52asjd7k7olbsp', 'source': 'cnn', 'instruction': '(CNN) -- World-record signing Cristiano Ronaldo scored on his debut as Real Madrid kicked off the Spanish football season with a shaky 3-2 victory at home to Deportivo La Coruna on Saturday night. \n\nCristiano Ronaldo celebrates after paying off a small chunk of his record transfer fee with a goal on his Real debut. \n\nReal\'s new generation of "Galacticos" were preceded onto the pitch by the world\'s fastest man Usain Bolt, with the Jamaican sprint star dribbling a ball to the delight of a packed Bernabeu crowd. \n\nCoach Manuel Pellegrini fielded seven new signings against a team who stunned Real 2-1 on the opening day last season to foreshadow a season of massive upheaval for the underachieving capital club. \n\nThe first goal took just 26 minutes in coming as $92 million signing Kaka fed the ball to $50 million capture Karim Benzema, whose shot rebounded off the post and fell welcomingly to Real captain Rau

Map:  48%|████▊     | 1388/2901 [00:04<00:03, 432.41 examples/s]

{'id': '3vzlgyjeyla24xe35qwi43vfdbpzxh', 'source': 'gutenberg', 'instruction': 'CHAPTER XII. \n\nThe Boys Talk It over \n\nAllen and Ike Watson were soon on the way back to the ranch. Fortunately Ike Watson knew every foot of the ground, and led by the most direct route. \n\nAs the reader knows, Paul and Chet heard them approaching and received their elder brother with open arms. \n\n"You look like a ghost!" declared Chet, starting back on catching sight of Allen\'s pale face. \n\n"And I feel like a shadow," responded Allen with a weary laugh. "But a good dinner and a nap will make me as bright as a dollar again." \n\n"He has our horses!" cried Paul. \n\n"Yes, but not my own," returned Allen. \n\nHe walked into the house and was here introduced to Noel Urner. The table was at once spread, and soon both Allen and Ike Watson were regaling themselves to their heart\'s content. \n\nDuring the progress of the meal Allen related all of his wonderful story of the fall from the bridge, the jou

Map:  51%|█████▏    | 1490/2901 [00:04<00:03, 420.05 examples/s]

{'id': '3yhh42uu5bfa2irondg2nax6ouyl0p', 'source': 'gutenberg', 'instruction': 'CHAPTER XVII. \n\nARRIVAL AT FORT CUMBERLAND--LETTERS OF WASHINGTON TO HIS FAMILY--PANIC OF DUNBAR--FORTUNES OF DR. HUGH MERCER--TRIUMPH OF THE FRENCH. \n\nThe obsequies of the unfortunate Braddock being finished, the escort continued its retreat with the sick and wounded. Washington, assisted by Dr. Craik, watched with assiduity over his comrades, Orme and Morris. As the horses which bore their litters were nearly knocked up, he despatched messengers to the commander of Fort Cumberland requesting that others might be sent on, and that comfortable quarters might be prepared for the reception of those officers. \n\nOn the 17th, the sad cavalcade reached the fort, and were relieved from the incessant apprehension of pursuit. Here, too, flying reports had preceded them, brought by fugitives from the battle; who, with the disposition usual in such cases to exaggerate, had represented the whole army as massacred

Map:  55%|█████▍    | 1587/2901 [00:04<00:02, 448.59 examples/s]

{'id': '3mhw492ww0da11apqm568g2lhpwvmb', 'source': 'mctest', 'instruction': 'When I was a little girl I lived in a place with no winter. It was always warm there. It never got cold. It never snowed. Sometimes it rained. Sometimes there was wind. But there was no snow on the trees or on the ground. There was no snow falling on our heads. There was no snow to make a snowman. \n\nMy teacher\'s name was Mrs. Scott. One day in December, Mrs. Scott told us that we were going to make a snowman. We were excited! We wanted to see a snowman. We knew how a snowman looked. We knew a snowman was made of three snowballs. A snowman had to have a carrot nose and black eyes and buttons. A snowman had a hat and a scarf. We knew all these things. \n\nMrs. Scott asked us to bring everything we needed to our classroom to make a snowman. Jimmy said, "I can bring the hat and the scarf." Anita said, "I can bring a carrot for the nose." Selma said, "I can cut out some black circles for the eyes and the buttons

Map:  59%|█████▊    | 1698/2901 [00:05<00:02, 445.21 examples/s]

{'id': '3z7ishfuh0vcpwdvxikqo4emm2i8zk', 'source': 'gutenberg', 'instruction': 'CHAPTER LVI. Ariadne \n\nMy Lord Castlewood had a house in Kensington Square spacious enough to accommodate the several members of his noble family, and convenient for their service at the palace hard by, when his Majesty dwelt there. Her ladyship had her evenings, and gave her card-parties here for such as would come; but Kensington was a long way from London a hundred years since, and George Selwyn said he for one was afraid to go, for fear of being robbed of a night,--whether by footpads with crape over their faces, or by ladies in rouge at the quadrille-table, we have no means of saying. About noon on the day after Harry had made his reappearance at White\'s, it chanced that all his virtuous kinsfolks partook of breakfast together, even Mr. Will being present, who was to go into waiting in the afternoon. \n\nThe ladies came first to their chocolate: them Mr. Will joined in his court suit; finally, my lo

Map:  62%|██████▏   | 1795/2901 [00:05<00:02, 462.27 examples/s]

{'id': '3m81gab8a0jmd2abdylnodsjorvbqc', 'source': 'cnn', 'instruction': '(CNN) -- Former U.S. Sen. Arlen Specter, who embodied a vanishing breed of liberal Republicanism before switching to the Democratic Party at the twilight of his political career, died Sunday after a long battle with cancer, his family announced. \n\nSpecter died of complications from non-Hodgkin\'s lymphoma at his home in Philadelphia, his family said. He was 82. \n\nThe veteran Pennsylvania politician had overcome numerous serious illnesses over the past two decades, including a brain tumor. He had been in the public eye since serving as a member of the Warren Commission, which investigated the assassination of President John F. Kennedy. \n\nSpecter was elected to the Senate in 1980 and represented Pennsylvania for 30 years, longer than anyone in the state\'s history. His politically moderate image fit hand-in-glove in the politically blue Northeast, both with its Democratic centrists and its liberal Republicans

Map:  65%|██████▌   | 1896/2901 [00:05<00:02, 482.64 examples/s]

{'id': '3bxqmrhwkzyaomlplwv1cu024qwumb', 'source': 'wikipedia', 'instruction': 'Wall Street is an eight-block-long street running roughly northwest to southeast from Broadway to South Street, at the East River, in the Financial District of Lower Manhattan in New York City. Over time, the term has become a metonym for the financial markets of the United States as a whole, the American financial services industry (even if financial firms are not physically located there), or New York-based financial interests. \n\nAnchored by Wall Street, New York City has been called both the most economically powerful city and the leading financial center of the world, and the city is home to the world\'s two largest stock exchanges by total market capitalization, the New York Stock Exchange and NASDAQ. Several other major exchanges have or had headquarters in the Wall Street area, including the New York Mercantile Exchange, the New York Board of Trade, and the former American Stock Exchange. \n\nThere

Map:  68%|██████▊   | 1970/2901 [00:05<00:01, 485.76 examples/s]

{'id': '3qfufysy9yf51eztk30640iz83af4y', 'source': 'race', 'instruction': 'Probably no other musical instrument is as popular as the guitar around the world. Musicians use the guitar for almost all kinds of music. Country and western music would not be the same without a guitar. The traditional Spanish folk music called Flamenco could not exist without a guitar. The sound of American blues music would not be the same without the sad cry of the guitar. And rock and roll music would almost be impossible without this instrument. \n\nMusic experts do not agree about where the guitar was first played. Most agree it is ancient. Some experts say an instrument very much like a guitar was played in Egypt more than 1,000 years ago. Most experts say that the ancestor of the modern guitar was brought to Spain from Persia sometime in the 12thcentury. The guitar continued to develop in Spain. In the 1700s it became similar to the instrument we know today. \n\nMany famous musicians played the instrum

Map:  70%|██████▉   | 2020/2901 [00:05<00:02, 427.74 examples/s]

{'id': '37trt2x24qr5rf6yi81ercgxb09jb8', 'source': 'wikipedia', 'instruction': 'When the board has no embedded components it is more correctly called a printed wiring board (PWB) or etched wiring board. However, the term printed wiring board has fallen into disuse. A PCB populated with electronic components is called a printed circuit assembly (PCA), printed circuit board assembly or PCB assembly (PCBA). The IPC preferred term for assembled boards is circuit card assembly (CCA), and for assembled backplanes it is backplane assemblies. The term PCB is used informally both for bare and assembled boards. \n\nInitially PCBs were designed manually by creating a photomask on a clear mylar sheet, usually at two or four times the true size. Starting from the schematic diagram the component pin pads were laid out on the mylar and then traces were routed to connect the pads. Rub-on dry transfers of common component footprints increased efficiency. Traces were made with self-adhesive tape. Pre-pr

Map:  72%|███████▏  | 2080/2901 [00:05<00:01, 416.28 examples/s]

{'id': '3c6fju71tqtai3a34zjc6pn9dxkuy5', 'source': 'gutenberg', 'instruction': 'CHAPTER XXXIV Silver Hair \n\nAnd how should I your true love know From another man?--Friar of Orders Gray \n\n"Please God, I can try again." \n\nThose were the words with which Herbert Bowater looked into his Rector\'s face on awaking in the evening of that same December day from one of a series of sleeps, each sweeter and longer than the last, and which had borne him over the dreaded hours, without fever, and with strengthening pulse. \n\nJulius had not ventured to leave the sick-room that whole day, and when at last he went home and sank into the chair opposite Terry, for the first time through all these weeks of trouble and tension, he burst into a flood of tears. \n\nHe had hardly made the startled lad understand that life, not death, had thus overcome him, when the door flew open, and in rushed Rosamond, crying, "Julius, Julius, come! It is he or his ghost!" \n\n"Who? What?" \n\n"It is your hair! At M

Map:  73%|███████▎  | 2125/2901 [00:06<00:02, 331.00 examples/s]

{'id': '304sm51wa34yqipo52asjd7k7pmsb9', 'source': 'cnn', 'instruction': 'Belfast, Northern Ireland (CNN) -- Ireland\'s top Roman Catholic cleric, Cardinal Sean Brady, was under mounting pressure to resign Friday amid renewed allegations about his role in dealing with the sexual abuse of children by priests. \n\nA British television documentary repeated claims made in 2010 that Brady was told of attacks by pedophile priest Father Brendan Smyth in 1975 but did not inform police or the parents of the victims. \n\nThe documentary also claimed that Brady, then a priest, had a greater role in the church investigation of the Smyth allegations than he has admitted. New details and documents also were produced. \n\nResponding to the BBC program, Brady repeated his defense that he had done his job by passing details of all allegations to his superiors. \n\nHe told CNN that he felt "betrayed" when he discovered that church officials had taken no action against Smyth, who continued to abuse child

Map:  76%|███████▌  | 2205/2901 [00:06<00:02, 296.64 examples/s]

{'id': '39gxdjn2otevgc8lwlvn3y1qyhzv8a', 'source': 'wikipedia', 'instruction': 'The German equivalent was used with the founding of the North German Confederation whose constitution granted legislative power over the protection of intellectual property (Schutz des geistigen Eigentums) to the confederation. When the administrative secretariats established by the Paris Convention (1883) and the Berne Convention (1886) merged in 1893, they located in Berne, and also adopted the term intellectual property in their new combined title, the United International Bureaux for the Protection of Intellectual Property. \n\nThe term can be found used in an October 1845 Massachusetts Circuit Court ruling in the patent case Davoll et al. v. Brown., in which Justice Charles L. Woodbury wrote that "only in this way can we protect intellectual property, the labors of the mind, productions and interests are as much a man\'s own...as the wheat he cultivates, or the flocks he rears." The statement that "dis

Map:  80%|███████▉  | 2310/2901 [00:06<00:01, 351.12 examples/s]

{'id': '3t3iwe1xg6nm9o4sdkc8o7y5uy1tqq', 'source': 'race', 'instruction': 'Tom and Fred are talking about the year 2020. "What will our world be like in the year 2020?" asks Tom. "I don\'t know," says Fred. "What do you think?" asks Tom. "Well, no one knows, but it\'s interesting to guess," says Fred. "In the year 2020 everyone will carry a pocket computer. The computer will give people the answer to all their problem. We will have telephones in our pockets, too, and we will be able to talk to our friends," says Tom. "A lot of people will live and work under the sea. Machines will do most of the work, and so people will have more holidays. They will be able to fly to the moon by spaceship and spend their holidays there," says Fred. "And I hope I will be able to live under the sea," says Tom. "Won\'t that be very interesting? Just like a fish." QUESTIONS:\n', 'output': 'Q1:\n1. What are the name of the people that are talking?\n2. What are the people that are talking called?\n3. \nQ2:\n

Map:  82%|████████▏ | 2371/2901 [00:06<00:01, 366.61 examples/s]

{'id': '3vnl7uk1xfjpizejz41ec8urnxbtfh', 'source': 'cnn', 'instruction': 'Jerusalem (CNN) -- The Indian nanny who saved the life of an Israeli boy during the Mumbai terror attacks in 2008 has been granted honorary citizenship and temporary residency in Israel. \n\nAt a ceremony Monday, the Israeli interior ministry in Jerusalem handed Sandra Samuel her identity card. \n\n"I hope I will honor the citizenship and love Israel. I would give my heart and soul for Israel," she said. \n\nSamuel has been caring for the boy, Moshe Holtzberg, since his parents died in the terror attacks on a Jewish cultural center, Chabad House, and several luxury hotels in India\'s financial capital. \n\nThey were among six people who were killed at Chabad House. Altogether, more than 160 people died in the attacks. \n\nDuring the raids, 10 men also attacked buildings including the luxury Taj Mahal Palace and Tower and Oberoi-Trident hotels and the city\'s Chhatrapati Shivaji train station. \n\nThe only survivi

Map:  85%|████████▌ | 2469/2901 [00:07<00:01, 416.38 examples/s]

{'id': '32at8r96gl9dmhyu5trno3z8we1su6', 'source': 'wikipedia', 'instruction': 'The Arctic ( or ) is a polar region located at the northernmost part of Earth. The Arctic consists of the Arctic Ocean, adjacent seas, and parts of Alaska (United States), Northern Canada, Finland, Greenland (Denmark), Iceland, Norway, Russia and Sweden. Land within the Arctic region has seasonally varying snow and ice cover, with predominantly treeless permafrost-containing tundra. Arctic seas contain seasonal sea ice in many places. \n\nThe Arctic region is a unique area among Earth\'s ecosystems. For example, the cultures in the region and the Arctic indigenous peoples have adapted to its cold and extreme conditions. In recent years, Arctic sea ice decline has been caused by global warming. Life in the Arctic includes organisms living in the ice, zooplankton and phytoplankton, fish and marine mammals, birds, land animals, plants and human societies. Arctic land is bordered by the subarctic. \n\nThe word 

Map:  88%|████████▊ | 2567/2901 [00:07<00:00, 387.93 examples/s]

{'id': '3tayzsbpll8425psm9hhik4gc90s2d', 'source': 'cnn', 'instruction': '(CNN) -- Thus far, NBC\'s late-night transition has been like butter. \n\nJay Leno hosted his final episode of "The Tonight Show" on February 6 with massive celebrity support and even bigger ratings, and Jimmy Fallon took over on February 17 with the same level of excitement. \n\nBut there\'s still one more piece of the Peacock\'s puzzle left to be added: Seth Meyers. \n\nAt 12:35 a.m., the 40-year-old "Saturday Night Live" star will step behind the desk of "Late Night" and occupy a chair once filled by David Letterman, Conan O\'Brien and, directly before him, his pal and fellow "Saturday Night Live" alum Fallon. \n\nWith that kind of history, it goes without saying that there\'s pressure on Meyers not to screw up. Yet instead of marching in and tearing down "Late Night\'s" past, Meyers is planning to keep tradition intact while playing to his strengths. \n\nAfter 12 years on "SNL," nine of which he served as hea

Map:  92%|█████████▏| 2663/2901 [00:07<00:00, 423.81 examples/s]

{'id': '3y9n9ss8lybnly2ttj0x6vn8iyfd3x', 'source': 'gutenberg', 'instruction': 'CHAPTER THREE. \n\nDESCRIBES THE DEED OF AN AMATEUR MATADOR AND THE WORK OF A ROUGH-AND-READY SHOEMAKER. \n\nWhen the day began to break Hockins awoke, and his first impulse was to shout "hold on!" Ebony\'s first action was to let go, thereby bringing himself to the ground with an awful thud, which would have told severely on any one less akin to india-rubber. \n\nFor a few minutes Mark Breezy, holding tight to his particular branch, looked down at his companions, yawned heavily, and smiled a little. Then a sudden impulse of memory caused him to look grave. \n\n"Come," he said, dropping lightly from his perch, "these natives may have been searching for us all night, and are perhaps nearer than we suppose. I vote that we push on at once." \n\n"Agreed," said Hockins, stretching himself. \n\n"No fear, Massa," remarked the negro. "If it wur moonlight dey might \'ave search, but whar de nights am dark dey knows 

Map:  95%|█████████▌| 2764/2901 [00:07<00:00, 457.51 examples/s]

{'id': '3glb5jmzfxvofaehoy7hppchmvldgq', 'source': 'race', 'instruction': "Oprah Winfrey has come a long way from her poor childhood home in a small Mississippi town. She was an unwanted child whose parents never married. She was brought up on her grandmother's farm. The possibility that she would become rich and famous was slim. \n\nOprah's mother left her child in her mother's care, so she could go to work in Milwaukee, Wisconsin. It was a strict and difficult life for Oprah. But it also led the way for her future. \n\nShe was a highly intelligent child. By the age of three, she had learned to read and write. She also made her first public appearance at that age. Oprah's intelligence was resented by other children of her age. They called her unkind names and pushed her away. Oprah felt very isolated and unwanted. It made her feel worse that she didn't live with her mother and father. She felt that no one loved her. This made her angry and rebellious . These feelings brought her much 

Map:  99%|█████████▉| 2866/2901 [00:07<00:00, 483.15 examples/s]

{'id': '3kopy89hm820ok2l3fm89tiln763j9', 'source': 'wikipedia', 'instruction': 'Nonverbal communication describes the process of conveying meaning in the form of non-word messages. Examples of nonverbal communication include haptic communication, chronemic communication, gestures, body language, facial expression, eye contact, and how one dresses. Nonverbal communication also relates to intent of a message. Examples of intent are voluntary, intentional movements like shaking a hand or winking, as well as involuntary, such as sweating. Speech also contains nonverbal elements known as paralanguage, e.g. rhythm, intonation, tempo, and stress. There may even be a pheromone component. Research has shown that up to 55% of human communication may occur through non-verbal facial expressions, and a further 38% through paralanguage. It affects communication most at the subconscious level and establishes trust. Likewise, written texts include nonverbal elements such as handwriting style, spatial 

                                                                

{'id': '3vw04l3zlt6dz2eo488x7if4569xxq', 'source': 'cnn', 'instruction': '(CNN)It\'s amazing what (literal) lengths some people will go to earn frequent flier miles. Take Ben Schlappig. Over his lifetime, he\'s accrued about four million miles. Half of those, he estimates, were earned on "mile runs" -- trips taken for the sole purpose of earning points. \n\nRecently, he completed a dizzying, non-stop trip (London-New York-Los Angeles-Honolulu-Los Angeles-London-Miami-London-Los Angeles-London) that would set most people\'s heads spinning. It took four days (three of which were spent almost completely on a plane), and he accrued 270,000 miles. \n\nHe admits that the journey was exhausting, but ultimately worth it. \n\n"To put it into some kind of perspective, it\'s enough miles for two first-class tickets between the U.S. and Asia," he notes. \n\n"If I would have paid cash for them, they would have each cost me over $20,000." \n\nDecember -- which marks the last opportunity for many fre



Dataset({
    features: ['id', 'source', 'instruction', 'output', 'instruction_length', 'output_length'],
    num_rows: 2901
})

In [7]:
# get maximize instruction_length in entire dataset
max_length = max(ds['instruction_length'])
max_length

1599

In [8]:
max(ds['output_length'])

1436